From 274d7e4ac3ccfc55973a9fce6b923a681f7b310f Mon Sep 17 00:00:00 2001 From: WyZ0125 Date: Tue, 9 Dec 2025 16:28:05 +0800 Subject: [PATCH 1/9] feat(syscall): add clone3 support --- api/src/syscall/mod.rs | 5 + api/src/syscall/task/clone3.rs | 375 +++++++++++++++++++++++++++++++++ api/src/syscall/task/mod.rs | 5 +- 3 files changed, 384 insertions(+), 1 deletion(-) create mode 100644 api/src/syscall/task/clone3.rs diff --git a/api/src/syscall/mod.rs b/api/src/syscall/mod.rs index a0e0e251..da1ff60a 100644 --- a/api/src/syscall/mod.rs +++ b/api/src/syscall/mod.rs @@ -428,6 +428,11 @@ pub fn handle_syscall(uctx: &mut UserContext) { uctx.arg3(), uctx.arg4(), ), + Sysno::clone3 => sys_clone3( + uctx, + uctx.arg0() as _, // args_ptr + uctx.arg1() as _, // args_size + ), #[cfg(target_arch = "x86_64")] Sysno::fork => sys_fork(uctx), Sysno::exit => sys_exit(uctx.arg0() as _), diff --git a/api/src/syscall/task/clone3.rs b/api/src/syscall/task/clone3.rs new file mode 100644 index 00000000..d584d9e1 --- /dev/null +++ b/api/src/syscall/task/clone3.rs @@ -0,0 +1,375 @@ +use alloc::sync::Arc; + +use axerrno::{AxError, AxResult}; +use axfs::FS_CONTEXT; +use axhal::uspace::UserContext; +use axtask::{AxTaskExt, current, spawn_task}; +use bitflags::bitflags; +use kspin::SpinNoIrq; +use linux_raw_sys::general::*; +use starry_core::{ + mm::copy_from_kernel, + task::{AsThread, ProcessData, Thread, add_task_to_table}, +}; +use starry_process::Pid; +use starry_signal::Signo; + +use crate::{ + file::{FD_TABLE, FileLike, PidFd}, + mm::UserPtr, + task::new_user_task, +}; + +bitflags! { + /// Options for use with [`sys_clone3`]. + #[derive(Debug, Clone, Copy, Default)] + struct CloneFlags: u64 { + /// The calling process and the child process run in the same + /// memory space. + const VM = CLONE_VM as u64; + /// The caller and the child process share the same filesystem + /// information. + const FS = CLONE_FS as u64; + /// The calling process and the child process share the same file + /// descriptor table. + const FILES = CLONE_FILES as u64; + /// The calling process and the child process share the same table + /// of signal handlers. + const SIGHAND = CLONE_SIGHAND as u64; + /// Sets pidfd to the child process's PID file descriptor. + const PIDFD = CLONE_PIDFD as u64; + /// If the calling process is being traced, then trace the child + /// also. + const PTRACE = CLONE_PTRACE as u64; + /// The execution of the calling process is suspended until the + /// child releases its virtual memory resources via a call to + /// execve(2) or _exit(2) (as with vfork(2)). + const VFORK = CLONE_VFORK as u64; + /// The parent of the new child (as returned by getppid(2)) + /// will be the same as that of the calling process. + const PARENT = CLONE_PARENT as u64; + /// The child is placed in the same thread group as the calling + /// process. + const THREAD = CLONE_THREAD as u64; + /// The cloned child is started in a new mount namespace. + const NEWNS = CLONE_NEWNS as u64; + /// The child and the calling process share a single list of System + /// V semaphore adjustment values + const SYSVSEM = CLONE_SYSVSEM as u64; + /// The TLS (Thread Local Storage) descriptor is set to tls. + const SETTLS = CLONE_SETTLS as u64; + /// Store the child thread ID in the parent's memory. + const PARENT_SETTID = CLONE_PARENT_SETTID as u64; + /// Clear (zero) the child thread ID in child memory when the child + /// exits, and do a wakeup on the futex at that address. + const CHILD_CLEARTID = CLONE_CHILD_CLEARTID as u64; + /// A tracing process cannot force `CLONE_PTRACE` on this child + /// process. + const UNTRACED = CLONE_UNTRACED as u64; + /// Store the child thread ID in the child's memory. + const CHILD_SETTID = CLONE_CHILD_SETTID as u64; + /// Create the process in a new cgroup namespace. + const NEWCGROUP = CLONE_NEWCGROUP as u64; + /// Create the process in a new UTS namespace. + const NEWUTS = CLONE_NEWUTS as u64; + /// Create the process in a new IPC namespace. + const NEWIPC = CLONE_NEWIPC as u64; + /// Create the process in a new user namespace. + const NEWUSER = CLONE_NEWUSER as u64; + /// Create the process in a new PID namespace. + const NEWPID = CLONE_NEWPID as u64; + /// Create the process in a new network namespace. + const NEWNET = CLONE_NEWNET as u64; + /// The new process shares an I/O context with the calling process. + const IO = CLONE_IO as u64; + /// Clear signal handlers on clone (since Linux 5.5) + const CLEAR_SIGHAND = 0x100000000u64; + /// Clone into specific cgroup (since Linux 5.7) + const INTO_CGROUP = 0x200000000u64; + } +} + +/// Structure passed to clone3() system call +#[repr(C)] +#[derive(Debug, Clone, Copy, Default)] +pub struct CloneArgs { + /// Flags bit mask + pub flags: u64, + /// Where to store PID file descriptor (int *) + pub pidfd: u64, + /// Where to store child TID, in child's memory (pid_t *) + pub child_tid: u64, + /// Where to store child TID, in parent's memory (pid_t *) + pub parent_tid: u64, + /// Signal to deliver to parent on child termination + pub exit_signal: u64, + /// Pointer to lowest byte of stack + pub stack: u64, + /// Size of stack + pub stack_size: u64, + /// Location of new TLS + pub tls: u64, + /// Pointer to a pid_t array (since Linux 5.5) + pub set_tid: u64, + /// Number of elements in set_tid (since Linux 5.5) + pub set_tid_size: u64, + /// File descriptor for target cgroup of child (since Linux 5.7) + pub cgroup: u64, +} + +/// The minimum size of clone_args structure we support +const MIN_CLONE_ARGS_SIZE: usize = core::mem::size_of::() * 8; // First 8 fields + +/// Validate clone_args structure and flags +fn validate_clone_args(args: &CloneArgs) -> AxResult<()> { + let flags = CloneFlags::from_bits_truncate(args.flags); + + // Check for unsupported flag combinations + if args.exit_signal > 0 && flags.contains(CloneFlags::THREAD | CloneFlags::PARENT) { + return Err(AxError::InvalidInput); + } + + // CLONE_THREAD requires CLONE_VM and CLONE_SIGHAND + if flags.contains(CloneFlags::THREAD) && !flags.contains(CloneFlags::VM | CloneFlags::SIGHAND) { + return Err(AxError::InvalidInput); + } + + // Validate signal number + if args.exit_signal > 0 && args.exit_signal >= 64 { + return Err(AxError::InvalidInput); + } + + // Validate set_tid_size + if args.set_tid_size > 0 { + warn!("sys_clone3: set_tid/set_tid_size not fully supported, ignoring"); + // In a full implementation, we would validate: + // - set_tid_size <= nested PID namespace depth + // - PIDs in set_tid array are available + } + + // Validate cgroup fd + if args.cgroup > 0 { + warn!("sys_clone3: cgroup parameter not fully supported, ignoring"); + } + + // Namespace flags - stub support + let namespace_flags = CloneFlags::NEWNS + | CloneFlags::NEWIPC + | CloneFlags::NEWNET + | CloneFlags::NEWPID + | CloneFlags::NEWUSER + | CloneFlags::NEWUTS + | CloneFlags::NEWCGROUP; + + if flags.intersects(namespace_flags) { + warn!( + "sys_clone3: namespace flags detected ({:?}), stub support only", + flags & namespace_flags + ); + // Don't return error, just log warning for compatibility + } + + Ok(()) +} + +/// Implementation of clone3 system call +pub fn sys_clone3(uctx: &UserContext, args_ptr: usize, args_size: usize) -> AxResult { + debug!( + "sys_clone3 <= args_ptr: {:#x}, args_size: {}", + args_ptr, args_size + ); + + // Validate arguments size + if args_size < MIN_CLONE_ARGS_SIZE { + warn!( + "sys_clone3: args_size {} too small, minimum is {}", + args_size, MIN_CLONE_ARGS_SIZE + ); + return Err(AxError::InvalidInput); + } + + // Support larger structures for forward compatibility + if args_size > core::mem::size_of::() { + // Just use what we understand, ignore extra fields + debug!( + "sys_clone3: args_size {} larger than expected {}, using known fields only", + args_size, + core::mem::size_of::() + ); + } + + // Copy clone_args from user space + let args_uptr = UserPtr::::from(args_ptr); + let args = *args_uptr.get_as_mut()?; + + debug!("sys_clone3: args = {:?}", args); + + // Validate arguments + validate_clone_args(&args)?; + + let mut flags = CloneFlags::from_bits_truncate(args.flags); + + // Handle VFORK special case (same as sys_clone) + if flags.contains(CloneFlags::VFORK) { + debug!("sys_clone3: CLONE_VFORK slow path"); + flags.remove(CloneFlags::VM); + } + + debug!("sys_clone3: effective flags: {:?}", flags); + + // Parse exit signal + let exit_signal = if args.exit_signal > 0 { + Signo::from_repr(args.exit_signal as u8) + } else { + None + }; + + // Prepare new user context + let mut new_uctx = *uctx; + + // Set stack pointer if provided + if args.stack > 0 { + if args.stack_size > 0 { + // Stack grows downward, so set SP to stack + stack_size + new_uctx.set_sp((args.stack + args.stack_size) as usize); + } else { + new_uctx.set_sp(args.stack as usize); + } + } + + // Set TLS if requested + if flags.contains(CloneFlags::SETTLS) { + new_uctx.set_tls(args.tls as usize); + } + + // Child returns 0 + new_uctx.set_retval(0); + + // Prepare child_tid pointer if needed + let set_child_tid = if flags.contains(CloneFlags::CHILD_SETTID) && args.child_tid > 0 { + Some(UserPtr::::from(args.child_tid as usize).get_as_mut()?) + } else { + None + }; + + let curr = current(); + let old_proc_data = &curr.as_thread().proc_data; + + // Create new task + let mut new_task = new_user_task(&curr.name(), new_uctx, set_child_tid); + let tid = new_task.id().as_u64() as Pid; + + // Set parent_tid if requested + if flags.contains(CloneFlags::PARENT_SETTID) && args.parent_tid > 0 { + *UserPtr::::from(args.parent_tid as usize).get_as_mut()? = tid; + } + + // Create process data based on flags + let new_proc_data = if flags.contains(CloneFlags::THREAD) { + // Thread creation: share address space + new_task + .ctx_mut() + .set_page_table_root(old_proc_data.aspace.lock().page_table_root()); + old_proc_data.clone() + } else { + // Process creation: fork or vfork + let proc = if flags.contains(CloneFlags::PARENT) { + old_proc_data.proc.parent().ok_or(AxError::InvalidInput)? + } else { + old_proc_data.proc.clone() + } + .fork(tid); + + // Handle address space + let aspace = if flags.contains(CloneFlags::VM) { + old_proc_data.aspace.clone() + } else { + let mut aspace = old_proc_data.aspace.lock(); + let aspace = aspace.try_clone()?; + copy_from_kernel(&mut aspace.lock())?; + aspace + }; + + new_task + .ctx_mut() + .set_page_table_root(aspace.lock().page_table_root()); + + // Handle signal handlers + let signal_actions = if flags.contains(CloneFlags::SIGHAND) { + old_proc_data.signal.actions.clone() + } else if flags.contains(CloneFlags::CLEAR_SIGHAND) { + // CLONE_CLEAR_SIGHAND: reset to default handlers + Arc::new(SpinNoIrq::new(Default::default())) + } else { + // Normal fork: copy signal handlers + Arc::new(SpinNoIrq::new(old_proc_data.signal.actions.lock().clone())) + }; + + let proc_data = ProcessData::new( + proc, + old_proc_data.exe_path.read().clone(), + old_proc_data.cmdline.read().clone(), + aspace, + signal_actions, + exit_signal, + ); + proc_data.set_umask(old_proc_data.umask()); + + // Handle file descriptors and filesystem context + { + let mut scope = proc_data.scope.write(); + + if flags.contains(CloneFlags::FILES) { + // Share file descriptor table + FD_TABLE.scope_mut(&mut scope).clone_from(&FD_TABLE); + } else { + // Copy file descriptor table + FD_TABLE + .scope_mut(&mut scope) + .write() + .clone_from(&FD_TABLE.read()); + } + + if flags.contains(CloneFlags::FS) { + // Share filesystem context + FS_CONTEXT.scope_mut(&mut scope).clone_from(&FS_CONTEXT); + } else { + // Copy filesystem context + FS_CONTEXT + .scope_mut(&mut scope) + .lock() + .clone_from(&FS_CONTEXT.lock()); + } + } + + proc_data + }; + + // Add thread to process + new_proc_data.proc.add_thread(tid); + + // Handle PIDFD if requested + if flags.contains(CloneFlags::PIDFD) && args.pidfd > 0 { + let pidfd = PidFd::new(&new_proc_data); + let fd = pidfd.add_to_fd_table(true)?; + *UserPtr::::from(args.pidfd as usize).get_as_mut()? = fd; + } + + // Create thread object + let thr = Thread::new(tid, new_proc_data); + + // Set clear_child_tid if requested + if flags.contains(CloneFlags::CHILD_CLEARTID) && args.child_tid > 0 { + thr.set_clear_child_tid(args.child_tid as usize); + } + + *new_task.task_ext_mut() = Some(unsafe { AxTaskExt::from_impl(thr) }); + + // Spawn the task + let task = spawn_task(new_task); + add_task_to_table(&task); + + debug!("sys_clone3 => child tid: {}", tid); + + Ok(tid as _) +} diff --git a/api/src/syscall/task/mod.rs b/api/src/syscall/task/mod.rs index a6e77afc..2143a0e8 100644 --- a/api/src/syscall/task/mod.rs +++ b/api/src/syscall/task/mod.rs @@ -1,4 +1,5 @@ mod clone; +mod clone3; mod ctl; mod execve; mod exit; @@ -7,4 +8,6 @@ mod schedule; mod thread; mod wait; -pub use self::{clone::*, ctl::*, execve::*, exit::*, job::*, schedule::*, thread::*, wait::*}; +pub use self::{ + clone::*, clone3::*, ctl::*, execve::*, exit::*, job::*, schedule::*, thread::*, wait::*, +}; From b360af61630ddc0a8a4d08585ed8dea4326432e0 Mon Sep 17 00:00:00 2001 From: WyZ0125 Date: Sun, 14 Dec 2025 20:05:51 +0800 Subject: [PATCH 2/9] syscall: refactor clone/clone3 to share common do_clone logic --- api/src/syscall/task/clone.rs | 338 +++++++++++++++++++++++----- api/src/syscall/task/clone3.rs | 387 ++++++--------------------------- 2 files changed, 343 insertions(+), 382 deletions(-) diff --git a/api/src/syscall/task/clone.rs b/api/src/syscall/task/clone.rs index 06b50e2c..26f0c0c8 100644 --- a/api/src/syscall/task/clone.rs +++ b/api/src/syscall/task/clone.rs @@ -21,109 +21,199 @@ use crate::{ }; bitflags! { - /// Options for use with [`sys_clone`]. + /// Clone flags for process/thread creation. #[derive(Debug, Clone, Copy, Default)] - struct CloneFlags: u32 { + pub struct CloneFlags: u64 { /// The calling process and the child process run in the same /// memory space. - const VM = CLONE_VM; - /// The caller and the child process share the same filesystem + const VM = CLONE_VM as u64; + /// The caller and the child process share the same filesystem /// information. - const FS = CLONE_FS; + const FS = CLONE_FS as u64; /// The calling process and the child process share the same file /// descriptor table. - const FILES = CLONE_FILES; + const FILES = CLONE_FILES as u64; /// The calling process and the child process share the same table /// of signal handlers. - const SIGHAND = CLONE_SIGHAND; + const SIGHAND = CLONE_SIGHAND as u64; /// Sets pidfd to the child process's PID file descriptor. - const PIDFD = CLONE_PIDFD; + const PIDFD = CLONE_PIDFD as u64; /// If the calling process is being traced, then trace the child /// also. - const PTRACE = CLONE_PTRACE; + const PTRACE = CLONE_PTRACE as u64; /// The execution of the calling process is suspended until the /// child releases its virtual memory resources via a call to /// execve(2) or _exit(2) (as with vfork(2)). - const VFORK = CLONE_VFORK; - /// The parent of the new child (as returned by getppid(2)) + const VFORK = CLONE_VFORK as u64; + /// The parent of the new child (as returned by getppid(2)) /// will be the same as that of the calling process. - const PARENT = CLONE_PARENT; + const PARENT = CLONE_PARENT as u64; /// The child is placed in the same thread group as the calling /// process. - const THREAD = CLONE_THREAD; + const THREAD = CLONE_THREAD as u64; /// The cloned child is started in a new mount namespace. - const NEWNS = CLONE_NEWNS; + const NEWNS = CLONE_NEWNS as u64; /// The child and the calling process share a single list of System /// V semaphore adjustment values - const SYSVSEM = CLONE_SYSVSEM; + const SYSVSEM = CLONE_SYSVSEM as u64; /// The TLS (Thread Local Storage) descriptor is set to tls. - const SETTLS = CLONE_SETTLS; + const SETTLS = CLONE_SETTLS as u64; /// Store the child thread ID in the parent's memory. - const PARENT_SETTID = CLONE_PARENT_SETTID; + const PARENT_SETTID = CLONE_PARENT_SETTID as u64; /// Clear (zero) the child thread ID in child memory when the child /// exits, and do a wakeup on the futex at that address. - const CHILD_CLEARTID = CLONE_CHILD_CLEARTID; + const CHILD_CLEARTID = CLONE_CHILD_CLEARTID as u64; /// A tracing process cannot force `CLONE_PTRACE` on this child /// process. - const UNTRACED = CLONE_UNTRACED; + const UNTRACED = CLONE_UNTRACED as u64; /// Store the child thread ID in the child's memory. - const CHILD_SETTID = CLONE_CHILD_SETTID; + const CHILD_SETTID = CLONE_CHILD_SETTID as u64; /// Create the process in a new cgroup namespace. - const NEWCGROUP = CLONE_NEWCGROUP; + const NEWCGROUP = CLONE_NEWCGROUP as u64; /// Create the process in a new UTS namespace. - const NEWUTS = CLONE_NEWUTS; + const NEWUTS = CLONE_NEWUTS as u64; /// Create the process in a new IPC namespace. - const NEWIPC = CLONE_NEWIPC; + const NEWIPC = CLONE_NEWIPC as u64; /// Create the process in a new user namespace. - const NEWUSER = CLONE_NEWUSER; + const NEWUSER = CLONE_NEWUSER as u64; /// Create the process in a new PID namespace. - const NEWPID = CLONE_NEWPID; + const NEWPID = CLONE_NEWPID as u64; /// Create the process in a new network namespace. - const NEWNET = CLONE_NEWNET; + const NEWNET = CLONE_NEWNET as u64; /// The new process shares an I/O context with the calling process. - const IO = CLONE_IO; + const IO = CLONE_IO as u64; + /// Clear signal handlers on clone (since Linux 5.5) + const CLEAR_SIGHAND = 0x100000000u64; + /// Clone into specific cgroup (since Linux 5.7) + const INTO_CGROUP = 0x200000000u64; } } -pub fn sys_clone( - uctx: &UserContext, - flags: u32, - stack: usize, - parent_tid: usize, - #[cfg(any(target_arch = "x86_64", target_arch = "loongarch64"))] child_tid: usize, - tls: usize, - #[cfg(not(any(target_arch = "x86_64", target_arch = "loongarch64")))] child_tid: usize, -) -> AxResult { - const FLAG_MASK: u32 = 0xff; - let exit_signal = flags & FLAG_MASK; - let mut flags = CloneFlags::from_bits_truncate(flags & !FLAG_MASK); - if flags.contains(CloneFlags::VFORK) { - debug!("sys_clone: CLONE_VFORK slow path"); - flags.remove(CloneFlags::VM); - } +/// Trait for providing clone parameters in a flexible way. +/// +/// This allows clone() and clone3() to have different parameter semantics +/// while sharing the core implementation logic. +pub trait CloneParamProvider { + /// Get clone flags + fn flags(&self) -> CloneFlags; - debug!( - "sys_clone <= flags: {flags:?}, exit_signal: {exit_signal}, stack: {stack:#x}, ptid: \ - {parent_tid:#x}, ctid: {child_tid:#x}, tls: {tls:#x}" - ); + /// Get exit signal (0 means no signal) + fn exit_signal(&self) -> u64; + + /// Get new stack pointer (0 means inherit parent's) + fn stack_pointer(&self) -> usize; + + /// Get TLS value + fn tls(&self) -> usize; + + /// Get child_tid pointer for CHILD_SETTID + fn child_settid_ptr(&self) -> usize; + fn child_cleartid_ptr(&self) -> usize; - if exit_signal != 0 && flags.contains(CloneFlags::THREAD | CloneFlags::PARENT) { + /// Get parent_tid pointer for PARENT_SETTID (used by both clone and clone3) + fn parent_tid_ptr(&self) -> usize; + + /// Get pidfd pointer (0 if not used) + /// - For clone(): returns 0 (uses parent_tid_ptr instead) + /// - For clone3(): returns the pidfd field + fn pidfd_ptr(&self) -> usize; + + /// Validate parameters (different rules for clone vs clone3) + fn validate(&self) -> AxResult<()>; +} + +/// Common validation logic shared by all clone variants +fn validate_common(flags: CloneFlags, exit_signal: u64) -> AxResult<()> { + // Check for invalid flag combinations + // The original logic is retained here for the time being. + // In the future, it can be ignored and set to 0 simultaneously without reporting an error in some cases. + if exit_signal > 0 && flags.contains(CloneFlags::THREAD | CloneFlags::PARENT) { return Err(AxError::InvalidInput); } + if flags.contains(CloneFlags::THREAD) && !flags.contains(CloneFlags::VM | CloneFlags::SIGHAND) { return Err(AxError::InvalidInput); } - if flags.contains(CloneFlags::PIDFD | CloneFlags::PARENT_SETTID) { + + // https://man7.org/linux/man-pages/man2/clone.2.html + // CLONE_SIGHAND + // Since Linux 2.6.0, the flags mask must also include CLONE_VM if CLONE_SIGHAND is specified. + if flags.contains(CloneFlags::SIGHAND) && !flags.contains(CloneFlags::VM) { + return Err(AxError::InvalidInput); + } + + if flags.contains(CloneFlags::VFORK) && flags.contains(CloneFlags::THREAD) { + return Err(AxError::InvalidInput); + } + + // Validate exit signal range + if exit_signal >= 64 { return Err(AxError::InvalidInput); } - let exit_signal = Signo::from_repr(exit_signal as u8); + // Namespace flags warning + let namespace_flags = CloneFlags::NEWNS + | CloneFlags::NEWIPC + | CloneFlags::NEWNET + | CloneFlags::NEWPID + | CloneFlags::NEWUSER + | CloneFlags::NEWUTS + | CloneFlags::NEWCGROUP; + + if flags.intersects(namespace_flags) { + warn!( + "sys_clone/sys_clone3: namespace flags detected ({:?}), stub support only", + flags & namespace_flags + ); + } + + Ok(()) +} + +/// Core implementation of clone/clone3/fork/vfork. +/// +/// This function contains the shared logic for creating new tasks. +/// Different parameter semantics are handled through the `CloneParamProvider` trait. +pub fn do_clone(uctx: &UserContext, params: &P) -> AxResult { + // Validate parameters + params.validate()?; + + let mut flags = params.flags(); + let exit_signal = params.exit_signal(); + + // Common validation + validate_common(flags, exit_signal)?; + + // Handle VFORK special case + // NOTE: + // CLONE_VFORK currently shares address space, + // but does NOT suspend parent execution. + // This is a partial implementation. + if flags.contains(CloneFlags::VFORK) { + debug!("do_clone: CLONE_VFORK slow path"); + flags.remove(CloneFlags::VM); + } + + debug!( + "do_clone: flags={flags:?}, exit_signal={exit_signal}, stack={:#x}, tls={:#x}", + params.stack_pointer(), + params.tls() + ); + + let exit_signal = if exit_signal > 0 { + Signo::from_repr(exit_signal as u8) + } else { + None + }; + + // Prepare new user context let mut new_uctx = *uctx; - if stack != 0 { - new_uctx.set_sp(stack); + let stack_ptr = params.stack_pointer(); + if stack_ptr != 0 { + new_uctx.set_sp(stack_ptr); } if flags.contains(CloneFlags::SETTLS) { - new_uctx.set_tls(tls); + new_uctx.set_tls(params.tls()); } new_uctx.set_retval(0); @@ -136,19 +226,27 @@ pub fn sys_clone( let curr = current(); let old_proc_data = &curr.as_thread().proc_data; + // Create new task let mut new_task = new_user_task(&curr.name(), new_uctx, set_child_tid); - let tid = new_task.id().as_u64() as Pid; if flags.contains(CloneFlags::PARENT_SETTID) { (parent_tid as *mut Pid).vm_write(tid).ok(); + + // Write parent TID if PARENT_SETTID is set + let parent_tid_ptr = params.parent_tid_ptr(); + if flags.contains(CloneFlags::PARENT_SETTID) && parent_tid_ptr != 0 { + *UserPtr::::from(parent_tid_ptr).get_as_mut()? = tid; } + // Create process data based on flags (keep original inline logic) let new_proc_data = if flags.contains(CloneFlags::THREAD) { + // Thread creation: share address space new_task .ctx_mut() .set_page_table_root(old_proc_data.aspace.lock().page_table_root()); old_proc_data.clone() } else { + // Process creation let proc = if flags.contains(CloneFlags::PARENT) { old_proc_data.proc.parent().ok_or(AxError::InvalidInput)? } else { @@ -156,6 +254,7 @@ pub fn sys_clone( } .fork(tid); + // Handle address space let aspace = if flags.contains(CloneFlags::VM) { old_proc_data.aspace.clone() } else { @@ -164,15 +263,22 @@ pub fn sys_clone( copy_from_kernel(&mut aspace.lock())?; aspace }; + new_task .ctx_mut() .set_page_table_root(aspace.lock().page_table_root()); + // Handle signal handlers let signal_actions = if flags.contains(CloneFlags::SIGHAND) { old_proc_data.signal.actions.clone() + } else if flags.contains(CloneFlags::CLEAR_SIGHAND) { + // CLONE_CLEAR_SIGHAND: reset to default handlers + Arc::new(SpinNoIrq::new(Default::default())) } else { + // Normal fork: copy signal handlers Arc::new(SpinNoIrq::new(old_proc_data.signal.actions.lock().clone())) }; + let proc_data = ProcessData::new( proc, old_proc_data.exe_path.read().clone(), @@ -185,8 +291,10 @@ pub fn sys_clone( // Inherit heap pointers from parent to ensure child's heap state is consistent after fork proc_data.set_heap_top(old_proc_data.get_heap_top()); + // Handle file descriptors and filesystem context { let mut scope = proc_data.scope.write(); + if flags.contains(CloneFlags::FILES) { FD_TABLE.scope_mut(&mut scope).clone_from(&FD_TABLE); } else { @@ -209,25 +317,143 @@ pub fn sys_clone( proc_data }; + // Add thread to process new_proc_data.proc.add_thread(tid); + // Handle PIDFD if requested + // Different behavior for clone() vs clone3() if flags.contains(CloneFlags::PIDFD) { let pidfd = PidFd::new(&new_proc_data); (parent_tid as *mut i32).vm_write(pidfd.add_to_fd_table(true)?)?; + let fd = pidfd.add_to_fd_table(true)?; + + // Get the correct pointer based on clone variant + let pidfd_target_ptr = params.pidfd_ptr(); + if pidfd_target_ptr != 0 { + // clone3: write to pidfd field + *UserPtr::::from(pidfd_target_ptr).get_as_mut()? = fd; + } else if parent_tid_ptr != 0 { + // clone: write to parent_tid (historical behavior) + *UserPtr::::from(parent_tid_ptr).get_as_mut()? = fd; + } } + // Create thread object let thr = Thread::new(tid, new_proc_data); - if flags.contains(CloneFlags::CHILD_CLEARTID) { - thr.set_clear_child_tid(child_tid); + + // Set clear_child_tid if requested + let clear_child_tid_ptr = params.child_cleartid_ptr(); + if flags.contains(CloneFlags::CHILD_CLEARTID) && clear_child_tid_ptr != 0 { + thr.set_clear_child_tid(clear_child_tid_ptr); } + *new_task.task_ext_mut() = Some(unsafe { AxTaskExt::from_impl(thr) }); + // Spawn the task let task = spawn_task(new_task); add_task_to_table(&task); Ok(tid as _) } +// ================================ +// Clone (legacy) parameters +// ================================ + +/// Parameters for the clone() system call. +/// +/// Note: In clone(), the parent_tid parameter serves dual purpose: +/// - If CLONE_PIDFD: receives the pidfd +/// - If CLONE_PARENT_SETTID: receives the child TID +/// These two flags are mutually exclusive in clone(). +pub struct CloneParams { + flags: u32, + stack: usize, + parent_tid: usize, + child_tid: usize, + tls: usize, +} + +impl CloneParams { + pub fn new(flags: u32, stack: usize, parent_tid: usize, child_tid: usize, tls: usize) -> Self { + Self { + flags, + stack, + parent_tid, + child_tid, + tls, + } + } +} + +impl CloneParamProvider for CloneParams { + fn flags(&self) -> CloneFlags { + const FLAG_MASK: u32 = 0xff; + CloneFlags::from_bits_truncate((self.flags & !FLAG_MASK) as u64) + } + + fn exit_signal(&self) -> u64 { + const FLAG_MASK: u32 = 0xff; + (self.flags & FLAG_MASK) as u64 + } + + fn stack_pointer(&self) -> usize { + // For clone(), stack directly specifies the new SP + self.stack + } + + fn tls(&self) -> usize { + self.tls + } + + fn child_settid_ptr(&self) -> usize { + self.child_tid + } + + fn child_cleartid_ptr(&self) -> usize { + self.child_tid + } + + fn parent_tid_ptr(&self) -> usize { + self.parent_tid + } + + fn pidfd_ptr(&self) -> usize { + // For clone(), PIDFD uses parent_tid, so return 0 here + // The core logic will use parent_tid_ptr() instead + 0 + } + + fn validate(&self) -> AxResult<()> { + let flags = self.flags(); + + // In clone(), PIDFD and PARENT_SETTID are mutually exclusive + // because they share the parent_tid parameter + if flags.contains(CloneFlags::PIDFD) && flags.contains(CloneFlags::PARENT_SETTID) { + return Err(AxError::InvalidInput); + } + + Ok(()) + } +} + +// ================================ +// System call wrappers +// ================================ + +pub fn sys_clone( + uctx: &UserContext, + flags: u32, + stack: usize, + parent_tid: usize, + #[cfg(any(target_arch = "x86_64", target_arch = "loongarch64"))] child_tid: usize, + tls: usize, + #[cfg(not(any(target_arch = "x86_64", target_arch = "loongarch64")))] child_tid: usize, +) -> AxResult { + let params = CloneParams::new(flags, stack, parent_tid, child_tid, tls); + do_clone(uctx, ¶ms) +} + #[cfg(target_arch = "x86_64")] pub fn sys_fork(uctx: &UserContext) -> AxResult { sys_clone(uctx, SIGCHLD, 0, 0, 0, 0) diff --git a/api/src/syscall/task/clone3.rs b/api/src/syscall/task/clone3.rs index d584d9e1..6183df55 100644 --- a/api/src/syscall/task/clone3.rs +++ b/api/src/syscall/task/clone3.rs @@ -1,375 +1,110 @@ -use alloc::sync::Arc; - use axerrno::{AxError, AxResult}; -use axfs::FS_CONTEXT; use axhal::uspace::UserContext; -use axtask::{AxTaskExt, current, spawn_task}; -use bitflags::bitflags; -use kspin::SpinNoIrq; -use linux_raw_sys::general::*; -use starry_core::{ - mm::copy_from_kernel, - task::{AsThread, ProcessData, Thread, add_task_to_table}, -}; -use starry_process::Pid; -use starry_signal::Signo; - -use crate::{ - file::{FD_TABLE, FileLike, PidFd}, - mm::UserPtr, - task::new_user_task, -}; +use starry_vm::VmPtr; -bitflags! { - /// Options for use with [`sys_clone3`]. - #[derive(Debug, Clone, Copy, Default)] - struct CloneFlags: u64 { - /// The calling process and the child process run in the same - /// memory space. - const VM = CLONE_VM as u64; - /// The caller and the child process share the same filesystem - /// information. - const FS = CLONE_FS as u64; - /// The calling process and the child process share the same file - /// descriptor table. - const FILES = CLONE_FILES as u64; - /// The calling process and the child process share the same table - /// of signal handlers. - const SIGHAND = CLONE_SIGHAND as u64; - /// Sets pidfd to the child process's PID file descriptor. - const PIDFD = CLONE_PIDFD as u64; - /// If the calling process is being traced, then trace the child - /// also. - const PTRACE = CLONE_PTRACE as u64; - /// The execution of the calling process is suspended until the - /// child releases its virtual memory resources via a call to - /// execve(2) or _exit(2) (as with vfork(2)). - const VFORK = CLONE_VFORK as u64; - /// The parent of the new child (as returned by getppid(2)) - /// will be the same as that of the calling process. - const PARENT = CLONE_PARENT as u64; - /// The child is placed in the same thread group as the calling - /// process. - const THREAD = CLONE_THREAD as u64; - /// The cloned child is started in a new mount namespace. - const NEWNS = CLONE_NEWNS as u64; - /// The child and the calling process share a single list of System - /// V semaphore adjustment values - const SYSVSEM = CLONE_SYSVSEM as u64; - /// The TLS (Thread Local Storage) descriptor is set to tls. - const SETTLS = CLONE_SETTLS as u64; - /// Store the child thread ID in the parent's memory. - const PARENT_SETTID = CLONE_PARENT_SETTID as u64; - /// Clear (zero) the child thread ID in child memory when the child - /// exits, and do a wakeup on the futex at that address. - const CHILD_CLEARTID = CLONE_CHILD_CLEARTID as u64; - /// A tracing process cannot force `CLONE_PTRACE` on this child - /// process. - const UNTRACED = CLONE_UNTRACED as u64; - /// Store the child thread ID in the child's memory. - const CHILD_SETTID = CLONE_CHILD_SETTID as u64; - /// Create the process in a new cgroup namespace. - const NEWCGROUP = CLONE_NEWCGROUP as u64; - /// Create the process in a new UTS namespace. - const NEWUTS = CLONE_NEWUTS as u64; - /// Create the process in a new IPC namespace. - const NEWIPC = CLONE_NEWIPC as u64; - /// Create the process in a new user namespace. - const NEWUSER = CLONE_NEWUSER as u64; - /// Create the process in a new PID namespace. - const NEWPID = CLONE_NEWPID as u64; - /// Create the process in a new network namespace. - const NEWNET = CLONE_NEWNET as u64; - /// The new process shares an I/O context with the calling process. - const IO = CLONE_IO as u64; - /// Clear signal handlers on clone (since Linux 5.5) - const CLEAR_SIGHAND = 0x100000000u64; - /// Clone into specific cgroup (since Linux 5.7) - const INTO_CGROUP = 0x200000000u64; - } -} +use super::clone::{CloneFlags, CloneParamProvider, do_clone}; -/// Structure passed to clone3() system call +/// Structure passed to clone3() system call. #[repr(C)] #[derive(Debug, Clone, Copy, Default)] -pub struct CloneArgs { - /// Flags bit mask +pub struct Clone3Args { pub flags: u64, - /// Where to store PID file descriptor (int *) pub pidfd: u64, - /// Where to store child TID, in child's memory (pid_t *) pub child_tid: u64, - /// Where to store child TID, in parent's memory (pid_t *) pub parent_tid: u64, - /// Signal to deliver to parent on child termination pub exit_signal: u64, - /// Pointer to lowest byte of stack pub stack: u64, - /// Size of stack pub stack_size: u64, - /// Location of new TLS pub tls: u64, - /// Pointer to a pid_t array (since Linux 5.5) pub set_tid: u64, - /// Number of elements in set_tid (since Linux 5.5) pub set_tid_size: u64, - /// File descriptor for target cgroup of child (since Linux 5.7) pub cgroup: u64, } -/// The minimum size of clone_args structure we support -const MIN_CLONE_ARGS_SIZE: usize = core::mem::size_of::() * 8; // First 8 fields - -/// Validate clone_args structure and flags -fn validate_clone_args(args: &CloneArgs) -> AxResult<()> { - let flags = CloneFlags::from_bits_truncate(args.flags); - - // Check for unsupported flag combinations - if args.exit_signal > 0 && flags.contains(CloneFlags::THREAD | CloneFlags::PARENT) { - return Err(AxError::InvalidInput); - } - - // CLONE_THREAD requires CLONE_VM and CLONE_SIGHAND - if flags.contains(CloneFlags::THREAD) && !flags.contains(CloneFlags::VM | CloneFlags::SIGHAND) { - return Err(AxError::InvalidInput); - } - - // Validate signal number - if args.exit_signal > 0 && args.exit_signal >= 64 { - return Err(AxError::InvalidInput); - } - - // Validate set_tid_size - if args.set_tid_size > 0 { - warn!("sys_clone3: set_tid/set_tid_size not fully supported, ignoring"); - // In a full implementation, we would validate: - // - set_tid_size <= nested PID namespace depth - // - PIDs in set_tid array are available - } +const MIN_CLONE_ARGS_SIZE: usize = core::mem::size_of::() * 8; - // Validate cgroup fd - if args.cgroup > 0 { - warn!("sys_clone3: cgroup parameter not fully supported, ignoring"); +impl CloneParamProvider for Clone3Args { + fn flags(&self) -> CloneFlags { + CloneFlags::from_bits_truncate(self.flags) } - // Namespace flags - stub support - let namespace_flags = CloneFlags::NEWNS - | CloneFlags::NEWIPC - | CloneFlags::NEWNET - | CloneFlags::NEWPID - | CloneFlags::NEWUSER - | CloneFlags::NEWUTS - | CloneFlags::NEWCGROUP; - - if flags.intersects(namespace_flags) { - warn!( - "sys_clone3: namespace flags detected ({:?}), stub support only", - flags & namespace_flags - ); - // Don't return error, just log warning for compatibility + fn exit_signal(&self) -> u64 { + self.exit_signal } - Ok(()) -} - -/// Implementation of clone3 system call -pub fn sys_clone3(uctx: &UserContext, args_ptr: usize, args_size: usize) -> AxResult { - debug!( - "sys_clone3 <= args_ptr: {:#x}, args_size: {}", - args_ptr, args_size - ); - - // Validate arguments size - if args_size < MIN_CLONE_ARGS_SIZE { - warn!( - "sys_clone3: args_size {} too small, minimum is {}", - args_size, MIN_CLONE_ARGS_SIZE - ); - return Err(AxError::InvalidInput); + fn stack_pointer(&self) -> usize { + // For clone3(), stack + stack_size gives the SP + if self.stack > 0 { + if self.stack_size > 0 { + // Stack grows downward, SP = base + size + (self.stack + self.stack_size) as usize + } else { + // If only stack provided, treat as SP directly + self.stack as usize + } + } else { + 0 + } } - // Support larger structures for forward compatibility - if args_size > core::mem::size_of::() { - // Just use what we understand, ignore extra fields - debug!( - "sys_clone3: args_size {} larger than expected {}, using known fields only", - args_size, - core::mem::size_of::() - ); + fn tls(&self) -> usize { + self.tls as usize } - // Copy clone_args from user space - let args_uptr = UserPtr::::from(args_ptr); - let args = *args_uptr.get_as_mut()?; - - debug!("sys_clone3: args = {:?}", args); - - // Validate arguments - validate_clone_args(&args)?; - - let mut flags = CloneFlags::from_bits_truncate(args.flags); - - // Handle VFORK special case (same as sys_clone) - if flags.contains(CloneFlags::VFORK) { - debug!("sys_clone3: CLONE_VFORK slow path"); - flags.remove(CloneFlags::VM); + fn child_settid_ptr(&self) -> usize { + self.child_tid as usize } - debug!("sys_clone3: effective flags: {:?}", flags); - - // Parse exit signal - let exit_signal = if args.exit_signal > 0 { - Signo::from_repr(args.exit_signal as u8) - } else { - None - }; - - // Prepare new user context - let mut new_uctx = *uctx; - - // Set stack pointer if provided - if args.stack > 0 { - if args.stack_size > 0 { - // Stack grows downward, so set SP to stack + stack_size - new_uctx.set_sp((args.stack + args.stack_size) as usize); - } else { - new_uctx.set_sp(args.stack as usize); - } + fn child_cleartid_ptr(&self) -> usize { + self.child_tid as usize // for glibc compatibility } - // Set TLS if requested - if flags.contains(CloneFlags::SETTLS) { - new_uctx.set_tls(args.tls as usize); + fn parent_tid_ptr(&self) -> usize { + self.parent_tid as usize } - // Child returns 0 - new_uctx.set_retval(0); - - // Prepare child_tid pointer if needed - let set_child_tid = if flags.contains(CloneFlags::CHILD_SETTID) && args.child_tid > 0 { - Some(UserPtr::::from(args.child_tid as usize).get_as_mut()?) - } else { - None - }; - - let curr = current(); - let old_proc_data = &curr.as_thread().proc_data; - - // Create new task - let mut new_task = new_user_task(&curr.name(), new_uctx, set_child_tid); - let tid = new_task.id().as_u64() as Pid; - - // Set parent_tid if requested - if flags.contains(CloneFlags::PARENT_SETTID) && args.parent_tid > 0 { - *UserPtr::::from(args.parent_tid as usize).get_as_mut()? = tid; + fn pidfd_ptr(&self) -> usize { + // For clone3(), pidfd is a separate field + self.pidfd as usize } - // Create process data based on flags - let new_proc_data = if flags.contains(CloneFlags::THREAD) { - // Thread creation: share address space - new_task - .ctx_mut() - .set_page_table_root(old_proc_data.aspace.lock().page_table_root()); - old_proc_data.clone() - } else { - // Process creation: fork or vfork - let proc = if flags.contains(CloneFlags::PARENT) { - old_proc_data.proc.parent().ok_or(AxError::InvalidInput)? - } else { - old_proc_data.proc.clone() + fn validate(&self) -> AxResult<()> { + // Warn about unsupported features + if self.set_tid != 0 || self.set_tid_size != 0 { + warn!("sys_clone3: set_tid/set_tid_size not supported, ignoring"); } - .fork(tid); - - // Handle address space - let aspace = if flags.contains(CloneFlags::VM) { - old_proc_data.aspace.clone() - } else { - let mut aspace = old_proc_data.aspace.lock(); - let aspace = aspace.try_clone()?; - copy_from_kernel(&mut aspace.lock())?; - aspace - }; - - new_task - .ctx_mut() - .set_page_table_root(aspace.lock().page_table_root()); - - // Handle signal handlers - let signal_actions = if flags.contains(CloneFlags::SIGHAND) { - old_proc_data.signal.actions.clone() - } else if flags.contains(CloneFlags::CLEAR_SIGHAND) { - // CLONE_CLEAR_SIGHAND: reset to default handlers - Arc::new(SpinNoIrq::new(Default::default())) - } else { - // Normal fork: copy signal handlers - Arc::new(SpinNoIrq::new(old_proc_data.signal.actions.lock().clone())) - }; - - let proc_data = ProcessData::new( - proc, - old_proc_data.exe_path.read().clone(), - old_proc_data.cmdline.read().clone(), - aspace, - signal_actions, - exit_signal, - ); - proc_data.set_umask(old_proc_data.umask()); - - // Handle file descriptors and filesystem context - { - let mut scope = proc_data.scope.write(); - - if flags.contains(CloneFlags::FILES) { - // Share file descriptor table - FD_TABLE.scope_mut(&mut scope).clone_from(&FD_TABLE); - } else { - // Copy file descriptor table - FD_TABLE - .scope_mut(&mut scope) - .write() - .clone_from(&FD_TABLE.read()); - } - - if flags.contains(CloneFlags::FS) { - // Share filesystem context - FS_CONTEXT.scope_mut(&mut scope).clone_from(&FS_CONTEXT); - } else { - // Copy filesystem context - FS_CONTEXT - .scope_mut(&mut scope) - .lock() - .clone_from(&FS_CONTEXT.lock()); - } + if self.cgroup != 0 { + warn!("sys_clone3: cgroup parameter not supported, ignoring"); } - proc_data - }; - - // Add thread to process - new_proc_data.proc.add_thread(tid); + // In clone3(), PIDFD and PARENT_SETTID can coexist + // because they use separate fields (no validation needed) - // Handle PIDFD if requested - if flags.contains(CloneFlags::PIDFD) && args.pidfd > 0 { - let pidfd = PidFd::new(&new_proc_data); - let fd = pidfd.add_to_fd_table(true)?; - *UserPtr::::from(args.pidfd as usize).get_as_mut()? = fd; + Ok(()) } +} - // Create thread object - let thr = Thread::new(tid, new_proc_data); +pub fn sys_clone3(uctx: &UserContext, args_ptr: usize, args_size: usize) -> AxResult { + debug!("sys_clone3 <= args_ptr: {args_ptr:#x}, args_size: {args_size}"); - // Set clear_child_tid if requested - if flags.contains(CloneFlags::CHILD_CLEARTID) && args.child_tid > 0 { - thr.set_clear_child_tid(args.child_tid as usize); + // Validate size + if args_size < MIN_CLONE_ARGS_SIZE { + warn!("sys_clone3: args_size {args_size} too small, minimum is {MIN_CLONE_ARGS_SIZE}"); + return Err(AxError::InvalidInput); } - *new_task.task_ext_mut() = Some(unsafe { AxTaskExt::from_impl(thr) }); + if args_size > core::mem::size_of::() { + debug!("sys_clone3: args_size {args_size} larger than expected, using known fields only"); + } - // Spawn the task - let task = spawn_task(new_task); - add_task_to_table(&task); + // Copy arguments from user space + let args_ptr = args_ptr as *const Clone3Args; + let args = unsafe { args_ptr.vm_read_uninit()?.assume_init() }; + debug!("sys_clone3: args = {args:?}"); - debug!("sys_clone3 => child tid: {}", tid); + // Use common implementation + let result = do_clone(uctx, &args)?; + debug!("sys_clone3 => child tid: {result}"); - Ok(tid as _) + Ok(result) } From 309ec3f5682306bad7fd4617ebb4b9eda84da03a Mon Sep 17 00:00:00 2001 From: WyZ0125 Date: Sun, 14 Dec 2025 20:14:03 +0800 Subject: [PATCH 3/9] fix clippy error --- api/src/syscall/task/clone.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/src/syscall/task/clone.rs b/api/src/syscall/task/clone.rs index 26f0c0c8..2dc0668b 100644 --- a/api/src/syscall/task/clone.rs +++ b/api/src/syscall/task/clone.rs @@ -365,7 +365,7 @@ pub fn do_clone(uctx: &UserContext, params: &P) -> AxResu /// Note: In clone(), the parent_tid parameter serves dual purpose: /// - If CLONE_PIDFD: receives the pidfd /// - If CLONE_PARENT_SETTID: receives the child TID -/// These two flags are mutually exclusive in clone(). +/// These two flags are mutually exclusive in clone(). pub struct CloneParams { flags: u32, stack: usize, From 1db6013fbea6bc5449c1b8aefadfa692ec6c60ad Mon Sep 17 00:00:00 2001 From: WyZ0125 Date: Mon, 26 Jan 2026 20:44:46 +0800 Subject: [PATCH 4/9] syscall: add clone3 support with unified clone argument handling --- api/src/syscall/task/clone.rs | 312 ++++++++++----------------------- api/src/syscall/task/clone3.rs | 84 +++------ 2 files changed, 117 insertions(+), 279 deletions(-) diff --git a/api/src/syscall/task/clone.rs b/api/src/syscall/task/clone.rs index 2dc0668b..ee34ff7f 100644 --- a/api/src/syscall/task/clone.rs +++ b/api/src/syscall/task/clone.rs @@ -21,50 +21,42 @@ use crate::{ }; bitflags! { - /// Clone flags for process/thread creation. + /// Options for use with [`sys_clone`] and [`sys_clone3`]. #[derive(Debug, Clone, Copy, Default)] pub struct CloneFlags: u64 { - /// The calling process and the child process run in the same - /// memory space. + /// The calling process and the child process run in the same memory space. const VM = CLONE_VM as u64; - /// The caller and the child process share the same filesystem - /// information. + /// The caller and the child process share the same filesystem information. const FS = CLONE_FS as u64; - /// The calling process and the child process share the same file - /// descriptor table. + /// The calling process and the child process share the same file descriptor table. const FILES = CLONE_FILES as u64; - /// The calling process and the child process share the same table - /// of signal handlers. + /// The calling process and the child process share the same table of signal handlers. const SIGHAND = CLONE_SIGHAND as u64; /// Sets pidfd to the child process's PID file descriptor. const PIDFD = CLONE_PIDFD as u64; - /// If the calling process is being traced, then trace the child - /// also. + /// If the calling process is being traced, then trace the child also. const PTRACE = CLONE_PTRACE as u64; - /// The execution of the calling process is suspended until the - /// child releases its virtual memory resources via a call to - /// execve(2) or _exit(2) (as with vfork(2)). + /// The execution of the calling process is suspended until the child releases + /// its virtual memory resources via a call to execve(2) or _exit(2) (as with vfork(2)). const VFORK = CLONE_VFORK as u64; - /// The parent of the new child (as returned by getppid(2)) - /// will be the same as that of the calling process. + /// The parent of the new child (as returned by getppid(2)) will be the same + /// as that of the calling process. const PARENT = CLONE_PARENT as u64; - /// The child is placed in the same thread group as the calling - /// process. + /// The child is placed in the same thread group as the calling process. const THREAD = CLONE_THREAD as u64; /// The cloned child is started in a new mount namespace. const NEWNS = CLONE_NEWNS as u64; - /// The child and the calling process share a single list of System - /// V semaphore adjustment values + /// The child and the calling process share a single list of System V + /// semaphore adjustment values. const SYSVSEM = CLONE_SYSVSEM as u64; /// The TLS (Thread Local Storage) descriptor is set to tls. const SETTLS = CLONE_SETTLS as u64; /// Store the child thread ID in the parent's memory. const PARENT_SETTID = CLONE_PARENT_SETTID as u64; - /// Clear (zero) the child thread ID in child memory when the child - /// exits, and do a wakeup on the futex at that address. + /// Clear (zero) the child thread ID in child memory when the child exits, + /// and do a wakeup on the futex at that address. const CHILD_CLEARTID = CLONE_CHILD_CLEARTID as u64; - /// A tracing process cannot force `CLONE_PTRACE` on this child - /// process. + /// A tracing process cannot force `CLONE_PTRACE` on this child process. const UNTRACED = CLONE_UNTRACED as u64; /// Store the child thread ID in the child's memory. const CHILD_SETTID = CLONE_CHILD_SETTID as u64; @@ -82,76 +74,82 @@ bitflags! { const NEWNET = CLONE_NEWNET as u64; /// The new process shares an I/O context with the calling process. const IO = CLONE_IO as u64; - /// Clear signal handlers on clone (since Linux 5.5) + /// Clear signal handlers on clone (since Linux 5.5). const CLEAR_SIGHAND = 0x100000000u64; - /// Clone into specific cgroup (since Linux 5.7) + /// Clone into specific cgroup (since Linux 5.7). const INTO_CGROUP = 0x200000000u64; } } -/// Trait for providing clone parameters in a flexible way. +/// Unified arguments for clone/clone3/fork/vfork. /// -/// This allows clone() and clone3() to have different parameter semantics -/// while sharing the core implementation logic. -pub trait CloneParamProvider { - /// Get clone flags - fn flags(&self) -> CloneFlags; - - /// Get exit signal (0 means no signal) - fn exit_signal(&self) -> u64; - - /// Get new stack pointer (0 means inherit parent's) - fn stack_pointer(&self) -> usize; - - /// Get TLS value - fn tls(&self) -> usize; - - /// Get child_tid pointer for CHILD_SETTID - fn child_settid_ptr(&self) -> usize; - fn child_cleartid_ptr(&self) -> usize; +/// This structure is used internally to homogenize parameters from different +/// clone syscall variants (clone, clone3, fork, vfork). +#[derive(Debug, Clone, Copy, Default)] +pub struct CloneArgs { + pub flags: CloneFlags, + pub exit_signal: u64, + pub stack: usize, + pub tls: usize, + pub parent_tid: usize, + pub child_tid: usize, + pub pidfd: usize, +} - /// Get parent_tid pointer for PARENT_SETTID (used by both clone and clone3) - fn parent_tid_ptr(&self) -> usize; +impl CloneArgs { + /// Create CloneArgs from clone() syscall parameters. + /// + /// Note: In clone(), the parent_tid parameter serves dual purpose: + /// - If CLONE_PIDFD: receives the pidfd + /// - If CLONE_PARENT_SETTID: receives the child TID + /// These two flags are mutually exclusive. + pub fn from_clone( + raw_flags: u32, + stack: usize, + parent_tid: usize, + child_tid: usize, + tls: usize, + ) -> AxResult { + const FLAG_MASK: u32 = 0xff; + let flags = CloneFlags::from_bits_truncate((raw_flags & !FLAG_MASK) as u64); + let exit_signal = (raw_flags & FLAG_MASK) as u64; - /// Get pidfd pointer (0 if not used) - /// - For clone(): returns 0 (uses parent_tid_ptr instead) - /// - For clone3(): returns the pidfd field - fn pidfd_ptr(&self) -> usize; + if flags.contains(CloneFlags::PIDFD | CloneFlags::PARENT_SETTID) { + return Err(AxError::InvalidInput); + } - /// Validate parameters (different rules for clone vs clone3) - fn validate(&self) -> AxResult<()>; + Ok(Self { + flags, + exit_signal, + stack, + tls, + parent_tid, + child_tid, + pidfd: 0, + }) + } } -/// Common validation logic shared by all clone variants -fn validate_common(flags: CloneFlags, exit_signal: u64) -> AxResult<()> { - // Check for invalid flag combinations - // The original logic is retained here for the time being. - // In the future, it can be ignored and set to 0 simultaneously without reporting an error in some cases. +fn validate_common(args: &CloneArgs) -> AxResult<()> { + let flags = args.flags; + let exit_signal = args.exit_signal; + if exit_signal > 0 && flags.contains(CloneFlags::THREAD | CloneFlags::PARENT) { return Err(AxError::InvalidInput); } - if flags.contains(CloneFlags::THREAD) && !flags.contains(CloneFlags::VM | CloneFlags::SIGHAND) { return Err(AxError::InvalidInput); } - - // https://man7.org/linux/man-pages/man2/clone.2.html - // CLONE_SIGHAND - // Since Linux 2.6.0, the flags mask must also include CLONE_VM if CLONE_SIGHAND is specified. if flags.contains(CloneFlags::SIGHAND) && !flags.contains(CloneFlags::VM) { return Err(AxError::InvalidInput); } - if flags.contains(CloneFlags::VFORK) && flags.contains(CloneFlags::THREAD) { return Err(AxError::InvalidInput); } - - // Validate exit signal range if exit_signal >= 64 { return Err(AxError::InvalidInput); } - // Namespace flags warning let namespace_flags = CloneFlags::NEWNS | CloneFlags::NEWIPC | CloneFlags::NEWNET @@ -171,33 +169,20 @@ fn validate_common(flags: CloneFlags, exit_signal: u64) -> AxResult<()> { } /// Core implementation of clone/clone3/fork/vfork. -/// -/// This function contains the shared logic for creating new tasks. -/// Different parameter semantics are handled through the `CloneParamProvider` trait. -pub fn do_clone(uctx: &UserContext, params: &P) -> AxResult { - // Validate parameters - params.validate()?; - - let mut flags = params.flags(); - let exit_signal = params.exit_signal(); - - // Common validation - validate_common(flags, exit_signal)?; - - // Handle VFORK special case - // NOTE: - // CLONE_VFORK currently shares address space, - // but does NOT suspend parent execution. - // This is a partial implementation. +pub fn do_clone(uctx: &UserContext, args: CloneArgs) -> AxResult { + validate_common(&args)?; + + let mut flags = args.flags; + let exit_signal = args.exit_signal; + if flags.contains(CloneFlags::VFORK) { debug!("do_clone: CLONE_VFORK slow path"); flags.remove(CloneFlags::VM); } debug!( - "do_clone: flags={flags:?}, exit_signal={exit_signal}, stack={:#x}, tls={:#x}", - params.stack_pointer(), - params.tls() + "do_clone <= flags: {:?}, exit_signal: {}, stack: {:#x}, tls: {:#x}", + flags, exit_signal, args.stack, args.tls ); let exit_signal = if exit_signal > 0 { @@ -206,19 +191,17 @@ pub fn do_clone(uctx: &UserContext, params: &P) -> AxResu None }; - // Prepare new user context let mut new_uctx = *uctx; - let stack_ptr = params.stack_pointer(); - if stack_ptr != 0 { - new_uctx.set_sp(stack_ptr); + if args.stack != 0 { + new_uctx.set_sp(args.stack); } if flags.contains(CloneFlags::SETTLS) { - new_uctx.set_tls(params.tls()); + new_uctx.set_tls(args.tls); } new_uctx.set_retval(0); let set_child_tid = if flags.contains(CloneFlags::CHILD_SETTID) { - child_tid + args.child_tid } else { 0 }; @@ -226,27 +209,19 @@ pub fn do_clone(uctx: &UserContext, params: &P) -> AxResu let curr = current(); let old_proc_data = &curr.as_thread().proc_data; - // Create new task let mut new_task = new_user_task(&curr.name(), new_uctx, set_child_tid); - let tid = new_task.id().as_u64() as Pid; - if flags.contains(CloneFlags::PARENT_SETTID) { - (parent_tid as *mut Pid).vm_write(tid).ok(); - // Write parent TID if PARENT_SETTID is set - let parent_tid_ptr = params.parent_tid_ptr(); - if flags.contains(CloneFlags::PARENT_SETTID) && parent_tid_ptr != 0 { - *UserPtr::::from(parent_tid_ptr).get_as_mut()? = tid; + let tid = new_task.id().as_u64() as Pid; + if flags.contains(CloneFlags::PARENT_SETTID) && args.parent_tid != 0 { + (args.parent_tid as *mut Pid).vm_write(tid)?; } - // Create process data based on flags (keep original inline logic) let new_proc_data = if flags.contains(CloneFlags::THREAD) { - // Thread creation: share address space new_task .ctx_mut() .set_page_table_root(old_proc_data.aspace.lock().page_table_root()); old_proc_data.clone() } else { - // Process creation let proc = if flags.contains(CloneFlags::PARENT) { old_proc_data.proc.parent().ok_or(AxError::InvalidInput)? } else { @@ -254,7 +229,6 @@ pub fn do_clone(uctx: &UserContext, params: &P) -> AxResu } .fork(tid); - // Handle address space let aspace = if flags.contains(CloneFlags::VM) { old_proc_data.aspace.clone() } else { @@ -263,19 +237,15 @@ pub fn do_clone(uctx: &UserContext, params: &P) -> AxResu copy_from_kernel(&mut aspace.lock())?; aspace }; - new_task .ctx_mut() .set_page_table_root(aspace.lock().page_table_root()); - // Handle signal handlers let signal_actions = if flags.contains(CloneFlags::SIGHAND) { old_proc_data.signal.actions.clone() } else if flags.contains(CloneFlags::CLEAR_SIGHAND) { - // CLONE_CLEAR_SIGHAND: reset to default handlers Arc::new(SpinNoIrq::new(Default::default())) } else { - // Normal fork: copy signal handlers Arc::new(SpinNoIrq::new(old_proc_data.signal.actions.lock().clone())) }; @@ -288,13 +258,10 @@ pub fn do_clone(uctx: &UserContext, params: &P) -> AxResu exit_signal, ); proc_data.set_umask(old_proc_data.umask()); - // Inherit heap pointers from parent to ensure child's heap state is consistent after fork proc_data.set_heap_top(old_proc_data.get_heap_top()); - // Handle file descriptors and filesystem context { let mut scope = proc_data.scope.write(); - if flags.contains(CloneFlags::FILES) { FD_TABLE.scope_mut(&mut scope).clone_from(&FD_TABLE); } else { @@ -317,130 +284,33 @@ pub fn do_clone(uctx: &UserContext, params: &P) -> AxResu proc_data }; - // Add thread to process new_proc_data.proc.add_thread(tid); - // Handle PIDFD if requested - // Different behavior for clone() vs clone3() if flags.contains(CloneFlags::PIDFD) { let pidfd = PidFd::new(&new_proc_data); - (parent_tid as *mut i32).vm_write(pidfd.add_to_fd_table(true)?)?; let fd = pidfd.add_to_fd_table(true)?; - - // Get the correct pointer based on clone variant - let pidfd_target_ptr = params.pidfd_ptr(); - if pidfd_target_ptr != 0 { - // clone3: write to pidfd field - *UserPtr::::from(pidfd_target_ptr).get_as_mut()? = fd; - } else if parent_tid_ptr != 0 { - // clone: write to parent_tid (historical behavior) - *UserPtr::::from(parent_tid_ptr).get_as_mut()? = fd; + let target = if args.pidfd != 0 { + args.pidfd + } else { + args.parent_tid + }; + if target != 0 { + (target as *mut i32).vm_write(fd)?; } } - // Create thread object let thr = Thread::new(tid, new_proc_data); - - // Set clear_child_tid if requested - let clear_child_tid_ptr = params.child_cleartid_ptr(); - if flags.contains(CloneFlags::CHILD_CLEARTID) && clear_child_tid_ptr != 0 { - thr.set_clear_child_tid(clear_child_tid_ptr); + if flags.contains(CloneFlags::CHILD_CLEARTID) && args.child_tid != 0 { + thr.set_clear_child_tid(args.child_tid); } - *new_task.task_ext_mut() = Some(unsafe { AxTaskExt::from_impl(thr) }); - // Spawn the task let task = spawn_task(new_task); add_task_to_table(&task); Ok(tid as _) } -// ================================ -// Clone (legacy) parameters -// ================================ - -/// Parameters for the clone() system call. -/// -/// Note: In clone(), the parent_tid parameter serves dual purpose: -/// - If CLONE_PIDFD: receives the pidfd -/// - If CLONE_PARENT_SETTID: receives the child TID -/// These two flags are mutually exclusive in clone(). -pub struct CloneParams { - flags: u32, - stack: usize, - parent_tid: usize, - child_tid: usize, - tls: usize, -} - -impl CloneParams { - pub fn new(flags: u32, stack: usize, parent_tid: usize, child_tid: usize, tls: usize) -> Self { - Self { - flags, - stack, - parent_tid, - child_tid, - tls, - } - } -} - -impl CloneParamProvider for CloneParams { - fn flags(&self) -> CloneFlags { - const FLAG_MASK: u32 = 0xff; - CloneFlags::from_bits_truncate((self.flags & !FLAG_MASK) as u64) - } - - fn exit_signal(&self) -> u64 { - const FLAG_MASK: u32 = 0xff; - (self.flags & FLAG_MASK) as u64 - } - - fn stack_pointer(&self) -> usize { - // For clone(), stack directly specifies the new SP - self.stack - } - - fn tls(&self) -> usize { - self.tls - } - - fn child_settid_ptr(&self) -> usize { - self.child_tid - } - - fn child_cleartid_ptr(&self) -> usize { - self.child_tid - } - - fn parent_tid_ptr(&self) -> usize { - self.parent_tid - } - - fn pidfd_ptr(&self) -> usize { - // For clone(), PIDFD uses parent_tid, so return 0 here - // The core logic will use parent_tid_ptr() instead - 0 - } - - fn validate(&self) -> AxResult<()> { - let flags = self.flags(); - - // In clone(), PIDFD and PARENT_SETTID are mutually exclusive - // because they share the parent_tid parameter - if flags.contains(CloneFlags::PIDFD) && flags.contains(CloneFlags::PARENT_SETTID) { - return Err(AxError::InvalidInput); - } - - Ok(()) - } -} - -// ================================ -// System call wrappers -// ================================ - pub fn sys_clone( uctx: &UserContext, flags: u32, @@ -450,8 +320,8 @@ pub fn sys_clone( tls: usize, #[cfg(not(any(target_arch = "x86_64", target_arch = "loongarch64")))] child_tid: usize, ) -> AxResult { - let params = CloneParams::new(flags, stack, parent_tid, child_tid, tls); - do_clone(uctx, ¶ms) + let args = CloneArgs::from_clone(flags, stack, parent_tid, child_tid, tls)?; + do_clone(uctx, args) } #[cfg(target_arch = "x86_64")] diff --git a/api/src/syscall/task/clone3.rs b/api/src/syscall/task/clone3.rs index 6183df55..755d8a70 100644 --- a/api/src/syscall/task/clone3.rs +++ b/api/src/syscall/task/clone3.rs @@ -2,7 +2,7 @@ use axerrno::{AxError, AxResult}; use axhal::uspace::UserContext; use starry_vm::VmPtr; -use super::clone::{CloneFlags, CloneParamProvider, do_clone}; +use super::clone::{CloneArgs, CloneFlags, do_clone}; /// Structure passed to clone3() system call. #[repr(C)] @@ -23,71 +23,42 @@ pub struct Clone3Args { const MIN_CLONE_ARGS_SIZE: usize = core::mem::size_of::() * 8; -impl CloneParamProvider for Clone3Args { - fn flags(&self) -> CloneFlags { - CloneFlags::from_bits_truncate(self.flags) - } +impl Clone3Args { + fn into_clone_args(self) -> AxResult { + if self.set_tid != 0 || self.set_tid_size != 0 { + warn!("sys_clone3: set_tid/set_tid_size not supported, ignoring"); + } + if self.cgroup != 0 { + warn!("sys_clone3: cgroup parameter not supported, ignoring"); + } - fn exit_signal(&self) -> u64 { - self.exit_signal - } + let flags = CloneFlags::from_bits_truncate(self.flags); - fn stack_pointer(&self) -> usize { - // For clone3(), stack + stack_size gives the SP - if self.stack > 0 { + let stack = if self.stack > 0 { if self.stack_size > 0 { - // Stack grows downward, SP = base + size (self.stack + self.stack_size) as usize } else { - // If only stack provided, treat as SP directly self.stack as usize } } else { 0 - } - } - - fn tls(&self) -> usize { - self.tls as usize - } - - fn child_settid_ptr(&self) -> usize { - self.child_tid as usize - } - - fn child_cleartid_ptr(&self) -> usize { - self.child_tid as usize // for glibc compatibility - } - - fn parent_tid_ptr(&self) -> usize { - self.parent_tid as usize - } - - fn pidfd_ptr(&self) -> usize { - // For clone3(), pidfd is a separate field - self.pidfd as usize - } - - fn validate(&self) -> AxResult<()> { - // Warn about unsupported features - if self.set_tid != 0 || self.set_tid_size != 0 { - warn!("sys_clone3: set_tid/set_tid_size not supported, ignoring"); - } - if self.cgroup != 0 { - warn!("sys_clone3: cgroup parameter not supported, ignoring"); - } - - // In clone3(), PIDFD and PARENT_SETTID can coexist - // because they use separate fields (no validation needed) - - Ok(()) + }; + + Ok(CloneArgs { + flags, + exit_signal: self.exit_signal, + stack, + tls: self.tls as usize, + parent_tid: self.parent_tid as usize, + child_tid: self.child_tid as usize, + pidfd: self.pidfd as usize, + }) } } pub fn sys_clone3(uctx: &UserContext, args_ptr: usize, args_size: usize) -> AxResult { debug!("sys_clone3 <= args_ptr: {args_ptr:#x}, args_size: {args_size}"); - // Validate size if args_size < MIN_CLONE_ARGS_SIZE { warn!("sys_clone3: args_size {args_size} too small, minimum is {MIN_CLONE_ARGS_SIZE}"); return Err(AxError::InvalidInput); @@ -97,14 +68,11 @@ pub fn sys_clone3(uctx: &UserContext, args_ptr: usize, args_size: usize) -> AxRe debug!("sys_clone3: args_size {args_size} larger than expected, using known fields only"); } - // Copy arguments from user space let args_ptr = args_ptr as *const Clone3Args; - let args = unsafe { args_ptr.vm_read_uninit()?.assume_init() }; - debug!("sys_clone3: args = {args:?}"); + let clone3_args = unsafe { args_ptr.vm_read_uninit()?.assume_init() }; - // Use common implementation - let result = do_clone(uctx, &args)?; - debug!("sys_clone3 => child tid: {result}"); + debug!("sys_clone3: args = {clone3_args:?}"); - Ok(result) + let args = clone3_args.into_clone_args()?; + do_clone(uctx, args) } From 7b6d21776704cf613f50cd98bd7ef125edf18523 Mon Sep 17 00:00:00 2001 From: WyZ0125 Date: Sat, 31 Jan 2026 23:48:51 +0800 Subject: [PATCH 5/9] fix PR suggestions --- api/src/syscall/task/clone.rs | 358 ++++++++++++++++----------------- api/src/syscall/task/clone3.rs | 46 +++-- 2 files changed, 201 insertions(+), 203 deletions(-) diff --git a/api/src/syscall/task/clone.rs b/api/src/syscall/task/clone.rs index ee34ff7f..7c81c569 100644 --- a/api/src/syscall/task/clone.rs +++ b/api/src/syscall/task/clone.rs @@ -97,218 +97,191 @@ pub struct CloneArgs { } impl CloneArgs { - /// Create CloneArgs from clone() syscall parameters. - /// - /// Note: In clone(), the parent_tid parameter serves dual purpose: - /// - If CLONE_PIDFD: receives the pidfd - /// - If CLONE_PARENT_SETTID: receives the child TID - /// These two flags are mutually exclusive. - pub fn from_clone( - raw_flags: u32, - stack: usize, - parent_tid: usize, - child_tid: usize, - tls: usize, - ) -> AxResult { - const FLAG_MASK: u32 = 0xff; - let flags = CloneFlags::from_bits_truncate((raw_flags & !FLAG_MASK) as u64); - let exit_signal = (raw_flags & FLAG_MASK) as u64; - - if flags.contains(CloneFlags::PIDFD | CloneFlags::PARENT_SETTID) { + /// Validate common clone flags and parameters. + fn validate(&self) -> AxResult<()> { + let Self { flags, exit_signal, .. } = self; + + if *exit_signal > 0 && flags.contains(CloneFlags::THREAD | CloneFlags::PARENT) { + return Err(AxError::InvalidInput); + } + if flags.contains(CloneFlags::THREAD) && !flags.contains(CloneFlags::VM | CloneFlags::SIGHAND) { + return Err(AxError::InvalidInput); + } + if flags.contains(CloneFlags::SIGHAND) && !flags.contains(CloneFlags::VM) { + return Err(AxError::InvalidInput); + } + if flags.contains(CloneFlags::VFORK) && flags.contains(CloneFlags::THREAD) { + return Err(AxError::InvalidInput); + } + if *exit_signal >= 64 { return Err(AxError::InvalidInput); } - Ok(Self { - flags, + let namespace_flags = CloneFlags::NEWNS + | CloneFlags::NEWIPC + | CloneFlags::NEWNET + | CloneFlags::NEWPID + | CloneFlags::NEWUSER + | CloneFlags::NEWUTS + | CloneFlags::NEWCGROUP; + + if flags.intersects(namespace_flags) { + warn!( + "sys_clone/sys_clone3: namespace flags detected, stub support only", + ); + } + + Ok(()) + } + + /// Core implementation of clone/clone3/fork/vfork. + pub fn do_clone(self, uctx: &UserContext) -> AxResult { + self.validate()?; + + let Self { + mut flags, exit_signal, stack, tls, parent_tid, child_tid, - pidfd: 0, - }) - } -} - -fn validate_common(args: &CloneArgs) -> AxResult<()> { - let flags = args.flags; - let exit_signal = args.exit_signal; + pidfd, + } = self; - if exit_signal > 0 && flags.contains(CloneFlags::THREAD | CloneFlags::PARENT) { - return Err(AxError::InvalidInput); - } - if flags.contains(CloneFlags::THREAD) && !flags.contains(CloneFlags::VM | CloneFlags::SIGHAND) { - return Err(AxError::InvalidInput); - } - if flags.contains(CloneFlags::SIGHAND) && !flags.contains(CloneFlags::VM) { - return Err(AxError::InvalidInput); - } - if flags.contains(CloneFlags::VFORK) && flags.contains(CloneFlags::THREAD) { - return Err(AxError::InvalidInput); - } - if exit_signal >= 64 { - return Err(AxError::InvalidInput); - } + if flags.contains(CloneFlags::VFORK) { + debug!("do_clone: CLONE_VFORK slow path"); + flags.remove(CloneFlags::VM); + } - let namespace_flags = CloneFlags::NEWNS - | CloneFlags::NEWIPC - | CloneFlags::NEWNET - | CloneFlags::NEWPID - | CloneFlags::NEWUSER - | CloneFlags::NEWUTS - | CloneFlags::NEWCGROUP; - - if flags.intersects(namespace_flags) { - warn!( - "sys_clone/sys_clone3: namespace flags detected ({:?}), stub support only", - flags & namespace_flags + debug!( + "do_clone <= flags: {:?}, exit_signal: {}, stack: {:#x}, tls: {:#x}", + flags, exit_signal, stack, tls ); - } - - Ok(()) -} - -/// Core implementation of clone/clone3/fork/vfork. -pub fn do_clone(uctx: &UserContext, args: CloneArgs) -> AxResult { - validate_common(&args)?; - - let mut flags = args.flags; - let exit_signal = args.exit_signal; - - if flags.contains(CloneFlags::VFORK) { - debug!("do_clone: CLONE_VFORK slow path"); - flags.remove(CloneFlags::VM); - } - - debug!( - "do_clone <= flags: {:?}, exit_signal: {}, stack: {:#x}, tls: {:#x}", - flags, exit_signal, args.stack, args.tls - ); - - let exit_signal = if exit_signal > 0 { - Signo::from_repr(exit_signal as u8) - } else { - None - }; - let mut new_uctx = *uctx; - if args.stack != 0 { - new_uctx.set_sp(args.stack); - } - if flags.contains(CloneFlags::SETTLS) { - new_uctx.set_tls(args.tls); - } - new_uctx.set_retval(0); + let exit_signal = if exit_signal > 0 { + Signo::from_repr(exit_signal as u8) + } else { + None + }; - let set_child_tid = if flags.contains(CloneFlags::CHILD_SETTID) { - args.child_tid - } else { - 0 - }; + let mut new_uctx = *uctx; + if stack != 0 { + new_uctx.set_sp(stack); + } + if flags.contains(CloneFlags::SETTLS) { + new_uctx.set_tls(tls); + } + new_uctx.set_retval(0); - let curr = current(); - let old_proc_data = &curr.as_thread().proc_data; + let set_child_tid = if flags.contains(CloneFlags::CHILD_SETTID) { + child_tid + } else { + 0 + }; - let mut new_task = new_user_task(&curr.name(), new_uctx, set_child_tid); + let curr = current(); + let old_proc_data = &curr.as_thread().proc_data; - let tid = new_task.id().as_u64() as Pid; - if flags.contains(CloneFlags::PARENT_SETTID) && args.parent_tid != 0 { - (args.parent_tid as *mut Pid).vm_write(tid)?; - } + let mut new_task = new_user_task(&curr.name(), new_uctx, set_child_tid); - let new_proc_data = if flags.contains(CloneFlags::THREAD) { - new_task - .ctx_mut() - .set_page_table_root(old_proc_data.aspace.lock().page_table_root()); - old_proc_data.clone() - } else { - let proc = if flags.contains(CloneFlags::PARENT) { - old_proc_data.proc.parent().ok_or(AxError::InvalidInput)? - } else { - old_proc_data.proc.clone() + let tid = new_task.id().as_u64() as Pid; + if flags.contains(CloneFlags::PARENT_SETTID) && parent_tid != 0 { + (parent_tid as *mut Pid).vm_write(tid).ok(); } - .fork(tid); - let aspace = if flags.contains(CloneFlags::VM) { - old_proc_data.aspace.clone() - } else { - let mut aspace = old_proc_data.aspace.lock(); - let aspace = aspace.try_clone()?; - copy_from_kernel(&mut aspace.lock())?; - aspace - }; - new_task - .ctx_mut() - .set_page_table_root(aspace.lock().page_table_root()); - - let signal_actions = if flags.contains(CloneFlags::SIGHAND) { - old_proc_data.signal.actions.clone() - } else if flags.contains(CloneFlags::CLEAR_SIGHAND) { - Arc::new(SpinNoIrq::new(Default::default())) + let new_proc_data = if flags.contains(CloneFlags::THREAD) { + new_task + .ctx_mut() + .set_page_table_root(old_proc_data.aspace.lock().page_table_root()); + old_proc_data.clone() } else { - Arc::new(SpinNoIrq::new(old_proc_data.signal.actions.lock().clone())) - }; - - let proc_data = ProcessData::new( - proc, - old_proc_data.exe_path.read().clone(), - old_proc_data.cmdline.read().clone(), - aspace, - signal_actions, - exit_signal, - ); - proc_data.set_umask(old_proc_data.umask()); - proc_data.set_heap_top(old_proc_data.get_heap_top()); - - { - let mut scope = proc_data.scope.write(); - if flags.contains(CloneFlags::FILES) { - FD_TABLE.scope_mut(&mut scope).clone_from(&FD_TABLE); + let proc = if flags.contains(CloneFlags::PARENT) { + old_proc_data.proc.parent().ok_or(AxError::InvalidInput)? } else { - FD_TABLE - .scope_mut(&mut scope) - .write() - .clone_from(&FD_TABLE.read()); + old_proc_data.proc.clone() } + .fork(tid); - if flags.contains(CloneFlags::FS) { - FS_CONTEXT.scope_mut(&mut scope).clone_from(&FS_CONTEXT); + let aspace = if flags.contains(CloneFlags::VM) { + old_proc_data.aspace.clone() + } else { + let mut aspace = old_proc_data.aspace.lock(); + let aspace = aspace.try_clone()?; + copy_from_kernel(&mut aspace.lock())?; + aspace + }; + new_task + .ctx_mut() + .set_page_table_root(aspace.lock().page_table_root()); + + let signal_actions = if flags.contains(CloneFlags::SIGHAND) { + old_proc_data.signal.actions.clone() + } else if flags.contains(CloneFlags::CLEAR_SIGHAND) { + Arc::new(SpinNoIrq::new(Default::default())) } else { - FS_CONTEXT - .scope_mut(&mut scope) - .lock() - .clone_from(&FS_CONTEXT.lock()); + Arc::new(SpinNoIrq::new(old_proc_data.signal.actions.lock().clone())) + }; + + let proc_data = ProcessData::new( + proc, + old_proc_data.exe_path.read().clone(), + old_proc_data.cmdline.read().clone(), + aspace, + signal_actions, + exit_signal, + ); + proc_data.set_umask(old_proc_data.umask()); + proc_data.set_heap_top(old_proc_data.get_heap_top()); + + { + let mut scope = proc_data.scope.write(); + if flags.contains(CloneFlags::FILES) { + FD_TABLE.scope_mut(&mut scope).clone_from(&FD_TABLE); + } else { + FD_TABLE + .scope_mut(&mut scope) + .write() + .clone_from(&FD_TABLE.read()); + } + + if flags.contains(CloneFlags::FS) { + FS_CONTEXT.scope_mut(&mut scope).clone_from(&FS_CONTEXT); + } else { + FS_CONTEXT + .scope_mut(&mut scope) + .lock() + .clone_from(&FS_CONTEXT.lock()); + } } - } - - proc_data - }; - new_proc_data.proc.add_thread(tid); - - if flags.contains(CloneFlags::PIDFD) { - let pidfd = PidFd::new(&new_proc_data); - let fd = pidfd.add_to_fd_table(true)?; - let target = if args.pidfd != 0 { - args.pidfd - } else { - args.parent_tid + proc_data }; - if target != 0 { - (target as *mut i32).vm_write(fd)?; + + new_proc_data.proc.add_thread(tid); + + if flags.contains(CloneFlags::PIDFD) { + let pidfd_obj = PidFd::new(&new_proc_data); + let fd = pidfd_obj.add_to_fd_table(true)?; + + // In clone3, pidfd field is used; in sys_clone, parent_tid is reused + if pidfd != 0 { + (pidfd as *mut i32).vm_write(fd)?; + } else if parent_tid != 0 { + (parent_tid as *mut i32).vm_write(fd)?; + } } - } - let thr = Thread::new(tid, new_proc_data); - if flags.contains(CloneFlags::CHILD_CLEARTID) && args.child_tid != 0 { - thr.set_clear_child_tid(args.child_tid); - } - *new_task.task_ext_mut() = Some(unsafe { AxTaskExt::from_impl(thr) }); + let thr = Thread::new(tid, new_proc_data); + if flags.contains(CloneFlags::CHILD_CLEARTID) && child_tid != 0 { + thr.set_clear_child_tid(child_tid); + } + *new_task.task_ext_mut() = Some(unsafe { AxTaskExt::from_impl(thr) }); - let task = spawn_task(new_task); - add_task_to_table(&task); + let task = spawn_task(new_task); + add_task_to_table(&task); - Ok(tid as _) + Ok(tid as _) + } } pub fn sys_clone( @@ -320,11 +293,28 @@ pub fn sys_clone( tls: usize, #[cfg(not(any(target_arch = "x86_64", target_arch = "loongarch64")))] child_tid: usize, ) -> AxResult { - let args = CloneArgs::from_clone(flags, stack, parent_tid, child_tid, tls)?; - do_clone(uctx, args) + const FLAG_MASK: u32 = 0xff; + let clone_flags = CloneFlags::from_bits_truncate((flags & !FLAG_MASK) as u64); + let exit_signal = (flags & FLAG_MASK) as u64; + + if clone_flags.contains(CloneFlags::PIDFD | CloneFlags::PARENT_SETTID) { + return Err(AxError::InvalidInput); + } + + let args = CloneArgs { + flags: clone_flags, + exit_signal, + stack, + tls, + parent_tid, + child_tid, + pidfd: 0, + }; + + args.do_clone(uctx) } #[cfg(target_arch = "x86_64")] pub fn sys_fork(uctx: &UserContext) -> AxResult { sys_clone(uctx, SIGCHLD, 0, 0, 0, 0) -} +} \ No newline at end of file diff --git a/api/src/syscall/task/clone3.rs b/api/src/syscall/task/clone3.rs index 755d8a70..a0d9a2ba 100644 --- a/api/src/syscall/task/clone3.rs +++ b/api/src/syscall/task/clone3.rs @@ -2,7 +2,7 @@ use axerrno::{AxError, AxResult}; use axhal::uspace::UserContext; use starry_vm::VmPtr; -use super::clone::{CloneArgs, CloneFlags, do_clone}; +use super::clone::{CloneArgs, CloneFlags}; /// Structure passed to clone3() system call. #[repr(C)] @@ -21,24 +21,32 @@ pub struct Clone3Args { pub cgroup: u64, } +// SAFETY: Clone3Args is a POD type with all fields being u64, which are Zeroable +unsafe impl bytemuck::Zeroable for Clone3Args {} + +// SAFETY: Clone3Args is a POD type with no invalid bit patterns +unsafe impl bytemuck::AnyBitPattern for Clone3Args {} + const MIN_CLONE_ARGS_SIZE: usize = core::mem::size_of::() * 8; -impl Clone3Args { - fn into_clone_args(self) -> AxResult { - if self.set_tid != 0 || self.set_tid_size != 0 { +impl TryFrom for CloneArgs { + type Error = axerrno::AxError; + + fn try_from(args: Clone3Args) -> AxResult { + if args.set_tid != 0 || args.set_tid_size != 0 { warn!("sys_clone3: set_tid/set_tid_size not supported, ignoring"); } - if self.cgroup != 0 { + if args.cgroup != 0 { warn!("sys_clone3: cgroup parameter not supported, ignoring"); } - let flags = CloneFlags::from_bits_truncate(self.flags); + let flags = CloneFlags::from_bits_truncate(args.flags); - let stack = if self.stack > 0 { - if self.stack_size > 0 { - (self.stack + self.stack_size) as usize + let stack = if args.stack > 0 { + if args.stack_size > 0 { + (args.stack + args.stack_size) as usize } else { - self.stack as usize + args.stack as usize } } else { 0 @@ -46,12 +54,12 @@ impl Clone3Args { Ok(CloneArgs { flags, - exit_signal: self.exit_signal, + exit_signal: args.exit_signal, stack, - tls: self.tls as usize, - parent_tid: self.parent_tid as usize, - child_tid: self.child_tid as usize, - pidfd: self.pidfd as usize, + tls: args.tls as usize, + parent_tid: args.parent_tid as usize, + child_tid: args.child_tid as usize, + pidfd: args.pidfd as usize, }) } } @@ -69,10 +77,10 @@ pub fn sys_clone3(uctx: &UserContext, args_ptr: usize, args_size: usize) -> AxRe } let args_ptr = args_ptr as *const Clone3Args; - let clone3_args = unsafe { args_ptr.vm_read_uninit()?.assume_init() }; + let clone3_args = args_ptr.vm_read()?; debug!("sys_clone3: args = {clone3_args:?}"); - let args = clone3_args.into_clone_args()?; - do_clone(uctx, args) -} + let args = CloneArgs::try_from(clone3_args)?; + args.do_clone(uctx) +} \ No newline at end of file From 20dee3657cb5694d660a03637f432a10068c6817 Mon Sep 17 00:00:00 2001 From: WyZ0125 Date: Fri, 6 Feb 2026 17:59:04 +0800 Subject: [PATCH 6/9] Update clone.rs --- api/src/syscall/task/clone.rs | 319 +++++++++++++++++++++++++++++++++- 1 file changed, 318 insertions(+), 1 deletion(-) diff --git a/api/src/syscall/task/clone.rs b/api/src/syscall/task/clone.rs index 7c81c569..136fd03d 100644 --- a/api/src/syscall/task/clone.rs +++ b/api/src/syscall/task/clone.rs @@ -11,10 +11,327 @@ use starry_core::{ mm::copy_from_kernel, task::{AsThread, ProcessData, Thread, add_task_to_table}, }; +use starry_process::Pid;use alloc::sync::Arc; + +use axerrno::{AxError, AxResult}; +use axfs::FS_CONTEXT; +use axhal::uspace::UserContext; +use axtask::{current, spawn_task, AxTaskExt}; +use bitflags::bitflags; +use kspin::SpinNoIrq; +use linux_raw_sys::general::*; +use starry_core::{ + mm::copy_from_kernel, + task::{add_task_to_table, AsThread, ProcessData, Thread}, +}; use starry_process::Pid; use starry_signal::Signo; use starry_vm::VmMutPtr; +use crate::{ + file::{FileLike, PidFd, FD_TABLE}, + task::new_user_task, +}; + +bitflags! { + /// Options for use with [`sys_clone`] and [`sys_clone3`]. + #[derive(Debug, Clone, Copy, Default)] + pub struct CloneFlags: u64 { + /// The calling process and the child process run in the same memory space. + const VM = CLONE_VM as u64; + /// The caller and the child process share the same filesystem information. + const FS = CLONE_FS as u64; + /// The calling process and the child process share the same file descriptor table. + const FILES = CLONE_FILES as u64; + /// The calling process and the child process share the same table of signal handlers. + const SIGHAND = CLONE_SIGHAND as u64; + /// Sets pidfd to the child process's PID file descriptor. + const PIDFD = CLONE_PIDFD as u64; + /// If the calling process is being traced, then trace the child also. + const PTRACE = CLONE_PTRACE as u64; + /// The execution of the calling process is suspended until the child releases + /// its virtual memory resources via a call to execve(2) or _exit(2) (as with vfork(2)). + const VFORK = CLONE_VFORK as u64; + /// The parent of the new child (as returned by getppid(2)) will be the same + /// as that of the calling process. + const PARENT = CLONE_PARENT as u64; + /// The child is placed in the same thread group as the calling process. + const THREAD = CLONE_THREAD as u64; + /// The cloned child is started in a new mount namespace. + const NEWNS = CLONE_NEWNS as u64; + /// The child and the calling process share a single list of System V + /// semaphore adjustment values. + const SYSVSEM = CLONE_SYSVSEM as u64; + /// The TLS (Thread Local Storage) descriptor is set to tls. + const SETTLS = CLONE_SETTLS as u64; + /// Store the child thread ID in the parent's memory. + const PARENT_SETTID = CLONE_PARENT_SETTID as u64; + /// Clear (zero) the child thread ID in child memory when the child exits, + /// and do a wakeup on the futex at that address. + const CHILD_CLEARTID = CLONE_CHILD_CLEARTID as u64; + /// A tracing process cannot force `CLONE_PTRACE` on this child process. + const UNTRACED = CLONE_UNTRACED as u64; + /// Store the child thread ID in the child's memory. + const CHILD_SETTID = CLONE_CHILD_SETTID as u64; + /// Create the process in a new cgroup namespace. + const NEWCGROUP = CLONE_NEWCGROUP as u64; + /// Create the process in a new UTS namespace. + const NEWUTS = CLONE_NEWUTS as u64; + /// Create the process in a new IPC namespace. + const NEWIPC = CLONE_NEWIPC as u64; + /// Create the process in a new user namespace. + const NEWUSER = CLONE_NEWUSER as u64; + /// Create the process in a new PID namespace. + const NEWPID = CLONE_NEWPID as u64; + /// Create the process in a new network namespace. + const NEWNET = CLONE_NEWNET as u64; + /// The new process shares an I/O context with the calling process. + const IO = CLONE_IO as u64; + /// Clear signal handlers on clone (since Linux 5.5). + const CLEAR_SIGHAND = 0x100000000u64; + /// Clone into specific cgroup (since Linux 5.7). + const INTO_CGROUP = 0x200000000u64; + } +} + +/// Unified arguments for clone/clone3/fork/vfork. +#[derive(Debug, Clone, Copy, Default)] +pub struct CloneArgs { + pub flags: CloneFlags, + pub exit_signal: u64, + pub stack: usize, + pub tls: usize, + pub parent_tid: usize, + pub child_tid: usize, + pub pidfd: usize, +} + +impl CloneArgs { + fn validate(&self) -> AxResult<()> { + let Self { + flags, + exit_signal, + .. + } = self; + + if *exit_signal > 0 && flags.contains(CloneFlags::THREAD | CloneFlags::PARENT) { + return Err(AxError::InvalidInput); + } + if flags.contains(CloneFlags::THREAD) + && !flags.contains(CloneFlags::VM | CloneFlags::SIGHAND) + { + return Err(AxError::InvalidInput); + } + if flags.contains(CloneFlags::SIGHAND) && !flags.contains(CloneFlags::VM) { + return Err(AxError::InvalidInput); + } + if flags.contains(CloneFlags::VFORK) && flags.contains(CloneFlags::THREAD) { + return Err(AxError::InvalidInput); + } + if *exit_signal >= 64 { + return Err(AxError::InvalidInput); + } + + let namespace_flags = CloneFlags::NEWNS + | CloneFlags::NEWIPC + | CloneFlags::NEWNET + | CloneFlags::NEWPID + | CloneFlags::NEWUSER + | CloneFlags::NEWUTS + | CloneFlags::NEWCGROUP; + + if flags.intersects(namespace_flags) { + warn!("sys_clone/sys_clone3: namespace flags detected, stub support only"); + } + + Ok(()) + } + + pub fn do_clone(self, uctx: &UserContext) -> AxResult { + self.validate()?; + + let Self { + mut flags, + exit_signal, + stack, + tls, + parent_tid, + child_tid, + pidfd, + } = self; + + if flags.contains(CloneFlags::VFORK) { + debug!("do_clone: CLONE_VFORK slow path"); + flags.remove(CloneFlags::VM); + } + + debug!( + "do_clone <= flags: {:?}, exit_signal: {}, stack: {:#x}, tls: {:#x}", + flags, exit_signal, stack, tls + ); + + let exit_signal = if exit_signal > 0 { + Signo::from_repr(exit_signal as u8) + } else { + None + }; + + let mut new_uctx = *uctx; + if stack != 0 { + new_uctx.set_sp(stack); + } + if flags.contains(CloneFlags::SETTLS) { + new_uctx.set_tls(tls); + } + new_uctx.set_retval(0); + + let set_child_tid = if flags.contains(CloneFlags::CHILD_SETTID) { + child_tid + } else { + 0 + }; + + let curr = current(); + let old_proc_data = &curr.as_thread().proc_data; + + let mut new_task = new_user_task(&curr.name(), new_uctx, set_child_tid); + + let tid = new_task.id().as_u64() as Pid; + if flags.contains(CloneFlags::PARENT_SETTID) && parent_tid != 0 { + (parent_tid as *mut Pid).vm_write(tid).ok(); + } + + let new_proc_data = if flags.contains(CloneFlags::THREAD) { + new_task + .ctx_mut() + .set_page_table_root(old_proc_data.aspace.lock().page_table_root()); + old_proc_data.clone() + } else { + let proc = if flags.contains(CloneFlags::PARENT) { + old_proc_data.proc.parent().ok_or(AxError::InvalidInput)? + } else { + old_proc_data.proc.clone() + } + .fork(tid); + + let aspace = if flags.contains(CloneFlags::VM) { + old_proc_data.aspace.clone() + } else { + let mut aspace = old_proc_data.aspace.lock(); + let aspace = aspace.try_clone()?; + copy_from_kernel(&mut aspace.lock())?; + aspace + }; + new_task + .ctx_mut() + .set_page_table_root(aspace.lock().page_table_root()); + + let signal_actions = if flags.contains(CloneFlags::SIGHAND) { + old_proc_data.signal.actions.clone() + } else if flags.contains(CloneFlags::CLEAR_SIGHAND) { + Arc::new(SpinNoIrq::new(Default::default())) + } else { + Arc::new(SpinNoIrq::new(old_proc_data.signal.actions.lock().clone())) + }; + + let proc_data = ProcessData::new( + proc, + old_proc_data.exe_path.read().clone(), + old_proc_data.cmdline.read().clone(), + aspace, + signal_actions, + exit_signal, + ); + proc_data.set_umask(old_proc_data.umask()); + proc_data.set_heap_top(old_proc_data.get_heap_top()); + + { + let mut scope = proc_data.scope.write(); + if flags.contains(CloneFlags::FILES) { + FD_TABLE.scope_mut(&mut scope).clone_from(&FD_TABLE); + } else { + FD_TABLE + .scope_mut(&mut scope) + .write() + .clone_from(&FD_TABLE.read()); + } + + if flags.contains(CloneFlags::FS) { + FS_CONTEXT.scope_mut(&mut scope).clone_from(&FS_CONTEXT); + } else { + FS_CONTEXT + .scope_mut(&mut scope) + .lock() + .clone_from(&FS_CONTEXT.lock()); + } + } + + proc_data + }; + + new_proc_data.proc.add_thread(tid); + + if flags.contains(CloneFlags::PIDFD) && pidfd != 0 { + let pidfd_obj = PidFd::new(&new_proc_data); + let fd = pidfd_obj.add_to_fd_table(true)?; + (pidfd as *mut i32).vm_write(fd)?; + } + + let thr = Thread::new(tid, new_proc_data); + if flags.contains(CloneFlags::CHILD_CLEARTID) && child_tid != 0 { + thr.set_clear_child_tid(child_tid); + } + *new_task.task_ext_mut() = Some(unsafe { AxTaskExt::from_impl(thr) }); + + let task = spawn_task(new_task); + add_task_to_table(&task); + + Ok(tid as _) + } +} + +pub fn sys_clone( + uctx: &UserContext, + flags: u32, + stack: usize, + parent_tid: usize, + #[cfg(any(target_arch = "x86_64", target_arch = "loongarch64"))] child_tid: usize, + tls: usize, + #[cfg(not(any(target_arch = "x86_64", target_arch = "loongarch64")))] child_tid: usize, +) -> AxResult { + const FLAG_MASK: u32 = 0xff; + let clone_flags = CloneFlags::from_bits_truncate((flags & !FLAG_MASK) as u64); + let exit_signal = (flags & FLAG_MASK) as u64; + + if clone_flags.contains(CloneFlags::PIDFD | CloneFlags::PARENT_SETTID) { + return Err(AxError::InvalidInput); + } + + let args = CloneArgs { + flags: clone_flags, + exit_signal, + stack, + tls, + parent_tid, + child_tid, + // In sys_clone, parent_tid is reused for pidfd when CLONE_PIDFD is set + pidfd: if clone_flags.contains(CloneFlags::PIDFD) { + parent_tid + } else { + 0 + }, + }; + + args.do_clone(uctx) +} + +#[cfg(target_arch = "x86_64")] +pub fn sys_fork(uctx: &UserContext) -> AxResult { + sys_clone(uctx, SIGCHLD, 0, 0, 0, 0) +} +use starry_signal::Signo; +use starry_vm::VmMutPtr; + use crate::{ file::{FD_TABLE, FileLike, PidFd}, task::new_user_task, @@ -317,4 +634,4 @@ pub fn sys_clone( #[cfg(target_arch = "x86_64")] pub fn sys_fork(uctx: &UserContext) -> AxResult { sys_clone(uctx, SIGCHLD, 0, 0, 0, 0) -} \ No newline at end of file +} From c89759d0604f3a81d48f7cb7f3a31af10c20f2de Mon Sep 17 00:00:00 2001 From: WyZ0125 Date: Fri, 6 Feb 2026 17:59:32 +0800 Subject: [PATCH 7/9] Update clone3.rs --- api/src/syscall/task/clone3.rs | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/api/src/syscall/task/clone3.rs b/api/src/syscall/task/clone3.rs index a0d9a2ba..1bdad8a8 100644 --- a/api/src/syscall/task/clone3.rs +++ b/api/src/syscall/task/clone3.rs @@ -6,7 +6,7 @@ use super::clone::{CloneArgs, CloneFlags}; /// Structure passed to clone3() system call. #[repr(C)] -#[derive(Debug, Clone, Copy, Default)] +#[derive(Debug, Clone, Copy, Default, bytemuck::Zeroable, bytemuck::AnyBitPattern)] pub struct Clone3Args { pub flags: u64, pub pidfd: u64, @@ -21,12 +21,6 @@ pub struct Clone3Args { pub cgroup: u64, } -// SAFETY: Clone3Args is a POD type with all fields being u64, which are Zeroable -unsafe impl bytemuck::Zeroable for Clone3Args {} - -// SAFETY: Clone3Args is a POD type with no invalid bit patterns -unsafe impl bytemuck::AnyBitPattern for Clone3Args {} - const MIN_CLONE_ARGS_SIZE: usize = core::mem::size_of::() * 8; impl TryFrom for CloneArgs { @@ -76,11 +70,10 @@ pub fn sys_clone3(uctx: &UserContext, args_ptr: usize, args_size: usize) -> AxRe debug!("sys_clone3: args_size {args_size} larger than expected, using known fields only"); } - let args_ptr = args_ptr as *const Clone3Args; - let clone3_args = args_ptr.vm_read()?; + let clone3_args = (args_ptr as *const Clone3Args).vm_read()?; debug!("sys_clone3: args = {clone3_args:?}"); let args = CloneArgs::try_from(clone3_args)?; args.do_clone(uctx) -} \ No newline at end of file +} From d75502e9a1bf2c940c19c51b68fa70c1913333db Mon Sep 17 00:00:00 2001 From: WyZ0125 Date: Fri, 6 Feb 2026 18:31:03 +0800 Subject: [PATCH 8/9] Update clone3.rs --- api/src/syscall/task/clone3.rs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/api/src/syscall/task/clone3.rs b/api/src/syscall/task/clone3.rs index 1bdad8a8..29248c58 100644 --- a/api/src/syscall/task/clone3.rs +++ b/api/src/syscall/task/clone3.rs @@ -58,22 +58,22 @@ impl TryFrom for CloneArgs { } } -pub fn sys_clone3(uctx: &UserContext, args_ptr: usize, args_size: usize) -> AxResult { - debug!("sys_clone3 <= args_ptr: {args_ptr:#x}, args_size: {args_size}"); +pub fn sys_clone3(uctx: &UserContext, args: *const Clone3Args, size: usize) -> AxResult { + debug!("sys_clone3 <= args: {args:p}, size: {size}"); - if args_size < MIN_CLONE_ARGS_SIZE { - warn!("sys_clone3: args_size {args_size} too small, minimum is {MIN_CLONE_ARGS_SIZE}"); + if size < MIN_CLONE_ARGS_SIZE { + warn!("sys_clone3: size {size} too small, minimum is {MIN_CLONE_ARGS_SIZE}"); return Err(AxError::InvalidInput); } - if args_size > core::mem::size_of::() { - debug!("sys_clone3: args_size {args_size} larger than expected, using known fields only"); + if size > core::mem::size_of::() { + debug!("sys_clone3: size {size} larger than expected, using known fields only"); } - let clone3_args = (args_ptr as *const Clone3Args).vm_read()?; + let clone3_args = args.vm_read()?; debug!("sys_clone3: args = {clone3_args:?}"); - let args = CloneArgs::try_from(clone3_args)?; - args.do_clone(uctx) + let clone_args = CloneArgs::try_from(clone3_args)?; + clone_args.do_clone(uctx) } From 244bf9fad21d1b16f9988945374d5fa12a28055a Mon Sep 17 00:00:00 2001 From: WyZ0125 Date: Fri, 6 Feb 2026 18:31:36 +0800 Subject: [PATCH 9/9] Update clone.rs --- api/src/syscall/task/clone.rs | 319 ---------------------------------- 1 file changed, 319 deletions(-) diff --git a/api/src/syscall/task/clone.rs b/api/src/syscall/task/clone.rs index 136fd03d..c11f7922 100644 --- a/api/src/syscall/task/clone.rs +++ b/api/src/syscall/task/clone.rs @@ -1,18 +1,5 @@ use alloc::sync::Arc; -use axerrno::{AxError, AxResult}; -use axfs::FS_CONTEXT; -use axhal::uspace::UserContext; -use axtask::{AxTaskExt, current, spawn_task}; -use bitflags::bitflags; -use kspin::SpinNoIrq; -use linux_raw_sys::general::*; -use starry_core::{ - mm::copy_from_kernel, - task::{AsThread, ProcessData, Thread, add_task_to_table}, -}; -use starry_process::Pid;use alloc::sync::Arc; - use axerrno::{AxError, AxResult}; use axfs::FS_CONTEXT; use axhal::uspace::UserContext; @@ -329,309 +316,3 @@ pub fn sys_clone( pub fn sys_fork(uctx: &UserContext) -> AxResult { sys_clone(uctx, SIGCHLD, 0, 0, 0, 0) } -use starry_signal::Signo; -use starry_vm::VmMutPtr; - -use crate::{ - file::{FD_TABLE, FileLike, PidFd}, - task::new_user_task, -}; - -bitflags! { - /// Options for use with [`sys_clone`] and [`sys_clone3`]. - #[derive(Debug, Clone, Copy, Default)] - pub struct CloneFlags: u64 { - /// The calling process and the child process run in the same memory space. - const VM = CLONE_VM as u64; - /// The caller and the child process share the same filesystem information. - const FS = CLONE_FS as u64; - /// The calling process and the child process share the same file descriptor table. - const FILES = CLONE_FILES as u64; - /// The calling process and the child process share the same table of signal handlers. - const SIGHAND = CLONE_SIGHAND as u64; - /// Sets pidfd to the child process's PID file descriptor. - const PIDFD = CLONE_PIDFD as u64; - /// If the calling process is being traced, then trace the child also. - const PTRACE = CLONE_PTRACE as u64; - /// The execution of the calling process is suspended until the child releases - /// its virtual memory resources via a call to execve(2) or _exit(2) (as with vfork(2)). - const VFORK = CLONE_VFORK as u64; - /// The parent of the new child (as returned by getppid(2)) will be the same - /// as that of the calling process. - const PARENT = CLONE_PARENT as u64; - /// The child is placed in the same thread group as the calling process. - const THREAD = CLONE_THREAD as u64; - /// The cloned child is started in a new mount namespace. - const NEWNS = CLONE_NEWNS as u64; - /// The child and the calling process share a single list of System V - /// semaphore adjustment values. - const SYSVSEM = CLONE_SYSVSEM as u64; - /// The TLS (Thread Local Storage) descriptor is set to tls. - const SETTLS = CLONE_SETTLS as u64; - /// Store the child thread ID in the parent's memory. - const PARENT_SETTID = CLONE_PARENT_SETTID as u64; - /// Clear (zero) the child thread ID in child memory when the child exits, - /// and do a wakeup on the futex at that address. - const CHILD_CLEARTID = CLONE_CHILD_CLEARTID as u64; - /// A tracing process cannot force `CLONE_PTRACE` on this child process. - const UNTRACED = CLONE_UNTRACED as u64; - /// Store the child thread ID in the child's memory. - const CHILD_SETTID = CLONE_CHILD_SETTID as u64; - /// Create the process in a new cgroup namespace. - const NEWCGROUP = CLONE_NEWCGROUP as u64; - /// Create the process in a new UTS namespace. - const NEWUTS = CLONE_NEWUTS as u64; - /// Create the process in a new IPC namespace. - const NEWIPC = CLONE_NEWIPC as u64; - /// Create the process in a new user namespace. - const NEWUSER = CLONE_NEWUSER as u64; - /// Create the process in a new PID namespace. - const NEWPID = CLONE_NEWPID as u64; - /// Create the process in a new network namespace. - const NEWNET = CLONE_NEWNET as u64; - /// The new process shares an I/O context with the calling process. - const IO = CLONE_IO as u64; - /// Clear signal handlers on clone (since Linux 5.5). - const CLEAR_SIGHAND = 0x100000000u64; - /// Clone into specific cgroup (since Linux 5.7). - const INTO_CGROUP = 0x200000000u64; - } -} - -/// Unified arguments for clone/clone3/fork/vfork. -/// -/// This structure is used internally to homogenize parameters from different -/// clone syscall variants (clone, clone3, fork, vfork). -#[derive(Debug, Clone, Copy, Default)] -pub struct CloneArgs { - pub flags: CloneFlags, - pub exit_signal: u64, - pub stack: usize, - pub tls: usize, - pub parent_tid: usize, - pub child_tid: usize, - pub pidfd: usize, -} - -impl CloneArgs { - /// Validate common clone flags and parameters. - fn validate(&self) -> AxResult<()> { - let Self { flags, exit_signal, .. } = self; - - if *exit_signal > 0 && flags.contains(CloneFlags::THREAD | CloneFlags::PARENT) { - return Err(AxError::InvalidInput); - } - if flags.contains(CloneFlags::THREAD) && !flags.contains(CloneFlags::VM | CloneFlags::SIGHAND) { - return Err(AxError::InvalidInput); - } - if flags.contains(CloneFlags::SIGHAND) && !flags.contains(CloneFlags::VM) { - return Err(AxError::InvalidInput); - } - if flags.contains(CloneFlags::VFORK) && flags.contains(CloneFlags::THREAD) { - return Err(AxError::InvalidInput); - } - if *exit_signal >= 64 { - return Err(AxError::InvalidInput); - } - - let namespace_flags = CloneFlags::NEWNS - | CloneFlags::NEWIPC - | CloneFlags::NEWNET - | CloneFlags::NEWPID - | CloneFlags::NEWUSER - | CloneFlags::NEWUTS - | CloneFlags::NEWCGROUP; - - if flags.intersects(namespace_flags) { - warn!( - "sys_clone/sys_clone3: namespace flags detected, stub support only", - ); - } - - Ok(()) - } - - /// Core implementation of clone/clone3/fork/vfork. - pub fn do_clone(self, uctx: &UserContext) -> AxResult { - self.validate()?; - - let Self { - mut flags, - exit_signal, - stack, - tls, - parent_tid, - child_tid, - pidfd, - } = self; - - if flags.contains(CloneFlags::VFORK) { - debug!("do_clone: CLONE_VFORK slow path"); - flags.remove(CloneFlags::VM); - } - - debug!( - "do_clone <= flags: {:?}, exit_signal: {}, stack: {:#x}, tls: {:#x}", - flags, exit_signal, stack, tls - ); - - let exit_signal = if exit_signal > 0 { - Signo::from_repr(exit_signal as u8) - } else { - None - }; - - let mut new_uctx = *uctx; - if stack != 0 { - new_uctx.set_sp(stack); - } - if flags.contains(CloneFlags::SETTLS) { - new_uctx.set_tls(tls); - } - new_uctx.set_retval(0); - - let set_child_tid = if flags.contains(CloneFlags::CHILD_SETTID) { - child_tid - } else { - 0 - }; - - let curr = current(); - let old_proc_data = &curr.as_thread().proc_data; - - let mut new_task = new_user_task(&curr.name(), new_uctx, set_child_tid); - - let tid = new_task.id().as_u64() as Pid; - if flags.contains(CloneFlags::PARENT_SETTID) && parent_tid != 0 { - (parent_tid as *mut Pid).vm_write(tid).ok(); - } - - let new_proc_data = if flags.contains(CloneFlags::THREAD) { - new_task - .ctx_mut() - .set_page_table_root(old_proc_data.aspace.lock().page_table_root()); - old_proc_data.clone() - } else { - let proc = if flags.contains(CloneFlags::PARENT) { - old_proc_data.proc.parent().ok_or(AxError::InvalidInput)? - } else { - old_proc_data.proc.clone() - } - .fork(tid); - - let aspace = if flags.contains(CloneFlags::VM) { - old_proc_data.aspace.clone() - } else { - let mut aspace = old_proc_data.aspace.lock(); - let aspace = aspace.try_clone()?; - copy_from_kernel(&mut aspace.lock())?; - aspace - }; - new_task - .ctx_mut() - .set_page_table_root(aspace.lock().page_table_root()); - - let signal_actions = if flags.contains(CloneFlags::SIGHAND) { - old_proc_data.signal.actions.clone() - } else if flags.contains(CloneFlags::CLEAR_SIGHAND) { - Arc::new(SpinNoIrq::new(Default::default())) - } else { - Arc::new(SpinNoIrq::new(old_proc_data.signal.actions.lock().clone())) - }; - - let proc_data = ProcessData::new( - proc, - old_proc_data.exe_path.read().clone(), - old_proc_data.cmdline.read().clone(), - aspace, - signal_actions, - exit_signal, - ); - proc_data.set_umask(old_proc_data.umask()); - proc_data.set_heap_top(old_proc_data.get_heap_top()); - - { - let mut scope = proc_data.scope.write(); - if flags.contains(CloneFlags::FILES) { - FD_TABLE.scope_mut(&mut scope).clone_from(&FD_TABLE); - } else { - FD_TABLE - .scope_mut(&mut scope) - .write() - .clone_from(&FD_TABLE.read()); - } - - if flags.contains(CloneFlags::FS) { - FS_CONTEXT.scope_mut(&mut scope).clone_from(&FS_CONTEXT); - } else { - FS_CONTEXT - .scope_mut(&mut scope) - .lock() - .clone_from(&FS_CONTEXT.lock()); - } - } - - proc_data - }; - - new_proc_data.proc.add_thread(tid); - - if flags.contains(CloneFlags::PIDFD) { - let pidfd_obj = PidFd::new(&new_proc_data); - let fd = pidfd_obj.add_to_fd_table(true)?; - - // In clone3, pidfd field is used; in sys_clone, parent_tid is reused - if pidfd != 0 { - (pidfd as *mut i32).vm_write(fd)?; - } else if parent_tid != 0 { - (parent_tid as *mut i32).vm_write(fd)?; - } - } - - let thr = Thread::new(tid, new_proc_data); - if flags.contains(CloneFlags::CHILD_CLEARTID) && child_tid != 0 { - thr.set_clear_child_tid(child_tid); - } - *new_task.task_ext_mut() = Some(unsafe { AxTaskExt::from_impl(thr) }); - - let task = spawn_task(new_task); - add_task_to_table(&task); - - Ok(tid as _) - } -} - -pub fn sys_clone( - uctx: &UserContext, - flags: u32, - stack: usize, - parent_tid: usize, - #[cfg(any(target_arch = "x86_64", target_arch = "loongarch64"))] child_tid: usize, - tls: usize, - #[cfg(not(any(target_arch = "x86_64", target_arch = "loongarch64")))] child_tid: usize, -) -> AxResult { - const FLAG_MASK: u32 = 0xff; - let clone_flags = CloneFlags::from_bits_truncate((flags & !FLAG_MASK) as u64); - let exit_signal = (flags & FLAG_MASK) as u64; - - if clone_flags.contains(CloneFlags::PIDFD | CloneFlags::PARENT_SETTID) { - return Err(AxError::InvalidInput); - } - - let args = CloneArgs { - flags: clone_flags, - exit_signal, - stack, - tls, - parent_tid, - child_tid, - pidfd: 0, - }; - - args.do_clone(uctx) -} - -#[cfg(target_arch = "x86_64")] -pub fn sys_fork(uctx: &UserContext) -> AxResult { - sys_clone(uctx, SIGCHLD, 0, 0, 0, 0) -}