Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 15 additions & 1 deletion kernel/src/arch_impl/aarch64/context_switch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -580,6 +580,15 @@ fn restore_userspace_context_arm64(thread_id: u64, frame: &mut Aarch64ExceptionF

// Switch TTBR0 if needed for different address space
switch_ttbr0_if_needed(thread_id);

// CRITICAL: Set user_rsp_scratch to this thread's kernel stack top.
// The IRQ return path (boot.S) uses `mov sp, [user_rsp_scratch]` before ERET.
// Without this, SP_EL1 retains the switching-out thread's stack pointer,
// causing the next IRQ from EL0 to allocate its exception frame on the
// wrong kernel stack — corrupting memory and other threads' SVC frames.
unsafe {
Aarch64PerCpu::set_user_rsp_scratch(Aarch64PerCpu::kernel_stack_top());
}
}

/// Set up exception frame for first entry to userspace.
Expand Down Expand Up @@ -644,7 +653,12 @@ fn setup_first_userspace_entry_arm64(thread_id: u64, frame: &mut Aarch64Exceptio
// Switch TTBR0 for this thread's address space
switch_ttbr0_if_needed(thread_id);

// NOTE: No logging - context switch path must be lock-free
// CRITICAL: Set user_rsp_scratch to this thread's kernel stack top.
// Same as restore_userspace_context_arm64 — the IRQ return path uses
// user_rsp_scratch for SP after ERET. Without this, SP_EL1 is wrong.
unsafe {
Aarch64PerCpu::set_user_rsp_scratch(Aarch64PerCpu::kernel_stack_top());
}
}

/// Switch TTBR0_EL1 if the thread requires a different address space.
Expand Down
110 changes: 66 additions & 44 deletions kernel/src/arch_impl/aarch64/exception.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,36 +81,35 @@ pub extern "C" fn handle_sync_exception(frame: *mut Aarch64ExceptionFrame, esr:

// Not a CoW fault or couldn't be handled
let frame_ref = unsafe { &mut *frame };
crate::serial_println!("[exception] Data abort at address {:#x}", far);
crate::serial_println!(" ELR: {:#x}, ESR: {:#x}", frame_ref.elr, esr);
crate::serial_println!(" ISS: {:#x} (WnR={}, DFSC={:#x})",
iss, (iss >> 6) & 1, iss & 0x3F);
let dfsc = (iss & 0x3F) as u16;

// Check if from userspace (EL0) - SPSR[3:0] indicates source EL
let from_el0 = (frame_ref.spsr & 0xF) == 0;
let ttbr0: u64;
unsafe {
core::arch::asm!("mrs {}, ttbr0_el1", out(reg) ttbr0, options(nomem, nostack));
}

// Lock-free trace: data abort event
crate::tracing::providers::process::trace_data_abort(0, dfsc);

// One condensed serial_println for fatal crash visibility
crate::serial_println!("[DATA_ABORT] FAR={:#x} ESR={:#x} DFSC={:#x}", far, esr, dfsc);

if from_el0 {
// From userspace - terminate the process with SIGSEGV
crate::serial_println!("[exception] Terminating userspace process with SIGSEGV");

// Get current TTBR0 to find the process
let ttbr0: u64;
unsafe {
core::arch::asm!("mrs {}, ttbr0_el1", out(reg) ttbr0, options(nomem, nostack));
}
let page_table_phys = ttbr0 & !0xFFFF_0000_0000_0FFF;

// Find and terminate the process
let mut terminated = false;
crate::process::with_process_manager(|pm| {
if let Some((pid, process)) = pm.find_process_by_cr3_mut(page_table_phys) {
let name = process.name.clone();
crate::serial_println!("[exception] Killing process {} (PID {}) due to data abort",
name, pid.as_u64());
if let Some((pid, _process)) = pm.find_process_by_cr3_mut(page_table_phys) {
crate::tracing::providers::process::trace_process_exit(pid.as_u64() as u16, (-11i16) as u16);
pm.exit_process(pid, -11); // SIGSEGV exit code
terminated = true;
} else {
crate::serial_println!("[exception] Could not find process with TTBR0={:#x}", page_table_phys);
// trace_data_abort already captured the fault
}
});

Expand All @@ -136,10 +135,51 @@ pub extern "C" fn handle_sync_exception(frame: *mut Aarch64ExceptionFrame, esr:
}

exception_class::INSTRUCTION_ABORT_LOWER | exception_class::INSTRUCTION_ABORT_SAME => {
let frame = unsafe { &*frame };
crate::serial_println!("[exception] Instruction abort at address {:#x}", far);
crate::serial_println!(" ELR: {:#x}, ESR: {:#x}", frame.elr, esr);
// For now, hang
let frame_ref = unsafe { &mut *frame };
let ifsc = (iss & 0x3F) as u16;
let from_el0 = (frame_ref.spsr & 0xF) == 0;

let ttbr0: u64;
unsafe {
core::arch::asm!("mrs {}, ttbr0_el1", out(reg) ttbr0, options(nomem, nostack));
}

crate::serial_println!(
"[INSTRUCTION_ABORT] FAR={:#x} ELR={:#x} ESR={:#x} IFSC={:#x} TTBR0={:#x} from_el0={}",
far, frame_ref.elr, esr, ifsc, ttbr0, from_el0
);

if from_el0 {
// From userspace - terminate the process with SIGSEGV
let page_table_phys = ttbr0 & !0xFFFF_0000_0000_0FFF;

let mut terminated = false;
crate::process::with_process_manager(|pm| {
if let Some((pid, _process)) = pm.find_process_by_cr3_mut(page_table_phys) {
crate::serial_println!(
"[INSTRUCTION_ABORT] Terminating PID {} (SIGSEGV)",
pid.as_u64()
);
crate::tracing::providers::process::trace_process_exit(
pid.as_u64() as u16,
(-11i16) as u16,
);
pm.exit_process(pid, -11); // SIGSEGV
terminated = true;
}
});

if terminated {
crate::task::scheduler::set_need_resched();
crate::task::scheduler::switch_to_idle();
frame_ref.elr =
crate::arch_impl::aarch64::idle_loop_arm64 as *const () as u64;
frame_ref.spsr = 0x3c5; // EL1h, interrupts enabled
return;
}
}

// From kernel or couldn't terminate - hang
loop { unsafe { core::arch::asm!("wfi"); } }
}

Expand Down Expand Up @@ -509,12 +549,8 @@ fn handle_cow_fault_arm64(far: u64, iss: u32) -> bool {
// Mask off ASID to get physical address
let page_table_phys = ttbr0 & !0xFFFF_0000_0000_0FFF;

crate::serial_println!(
"[COW ARM64] fault at {:#x}, ttbr0={:#x}, pt_phys={:#x}",
far,
ttbr0,
page_table_phys
);
// Lock-free trace: CoW fault entry (pid unknown yet, page index from far)
crate::tracing::providers::process::trace_cow_fault(0, (far >> 12) as u16);

// Try to acquire process manager lock
match crate::process::try_manager() {
Expand All @@ -529,7 +565,6 @@ fn handle_cow_fault_arm64(far: u64, iss: u32) -> bool {
let (_pid, process) = match pm.find_process_by_cr3_mut(page_table_phys) {
Some(p) => p,
None => {
crate::serial_println!("[COW] No process found for TTBR0");
return false;
}
};
Expand All @@ -545,23 +580,17 @@ fn handle_cow_fault_arm64(far: u64, iss: u32) -> bool {
let (old_frame, old_flags) = match page_table.get_page_info(page) {
Some(info) => info,
None => {
crate::serial_println!("[COW] No page info for {:#x}", far);
return false;
}
};

// Check if this is a CoW page
if !is_cow_page(old_flags) {
crate::serial_println!("[COW] Not a CoW page");
return false;
}

crate::serial_println!(
"[COW] Handling page {:#x}, frame={:#x}, shared={}",
far,
old_frame.start_address().as_u64(),
frame_is_shared(old_frame)
);
// Lock-free trace: CoW handling with known PID
crate::tracing::providers::process::trace_cow_fault(_pid.as_u64() as u16, (far >> 12) as u16);

// If we're the sole owner, just make it writable
if !frame_is_shared(old_frame) {
Expand All @@ -582,15 +611,14 @@ fn handle_cow_fault_arm64(far: u64, iss: u32) -> bool {
);
}
cow_stats::SOLE_OWNER_OPT.fetch_add(1, core::sync::atomic::Ordering::Relaxed);
crate::serial_println!("[COW] Made sole-owner page writable");
crate::tracing::providers::process::trace_cow_copy(_pid.as_u64() as u16, (far >> 12) as u16);
return true;
}

// Need to copy the page
let new_frame = match allocate_frame() {
Some(f) => f,
None => {
crate::serial_println!("[COW] Failed to allocate frame");
return false;
}
};
Expand All @@ -607,11 +635,9 @@ fn handle_cow_fault_arm64(far: u64, iss: u32) -> bool {
// Unmap old page and map new one with write permissions
let new_flags = make_private_flags(old_flags);
if page_table.unmap_page(page).is_err() {
crate::serial_println!("[COW] Failed to unmap old page");
return false;
}
if page_table.map_page(page, new_frame, new_flags).is_err() {
crate::serial_println!("[COW] Failed to map new page");
return false;
}

Expand All @@ -632,17 +658,13 @@ fn handle_cow_fault_arm64(far: u64, iss: u32) -> bool {
}

cow_stats::PAGES_COPIED.fetch_add(1, core::sync::atomic::Ordering::Relaxed);
crate::serial_println!(
"[COW] Copied page from {:#x} to {:#x}",
old_frame.start_address().as_u64(),
new_frame.start_address().as_u64()
);
crate::tracing::providers::process::trace_cow_copy(_pid.as_u64() as u16, (far >> 12) as u16);

true
}
None => {
cow_stats::DIRECT_PATH.fetch_add(1, core::sync::atomic::Ordering::Relaxed);
crate::serial_println!("[COW] Manager lock held, cannot handle");
crate::tracing::providers::process::trace_cow_lock_fail(0);
false
}
}
Expand Down
69 changes: 9 additions & 60 deletions kernel/src/memory/process_memory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -383,7 +383,6 @@ impl ProcessPageTable {
Some(frame) => {
let frame_addr = frame.start_address().as_u64();
log::debug!("Successfully allocated frame: {:#x}", frame_addr);
crate::serial_println!("🔍 ProcessPageTable::new() allocated L4 frame={:#x}", frame_addr);

// Check for problematic frames
if frame_addr == 0x611000 {
Expand All @@ -404,8 +403,6 @@ impl ProcessPageTable {
"Allocated L4 frame: {:#x}",
level_4_frame.start_address().as_u64()
);
crate::serial_println!("🔍 ProcessPageTable will use L4 frame={:#x}", level_4_frame.start_address().as_u64());

// Get physical memory offset
let phys_offset = crate::memory::physical_memory_offset();

Expand Down Expand Up @@ -1019,8 +1016,12 @@ impl ProcessPageTable {
let l4_virt = phys_offset + self.level_4_frame.start_address().as_u64();
let l4_table = &*(l4_virt.as_ptr() as *const PageTable);

// Walk L4 entries 0-255 (userspace only, 256-511 is kernel)
for l4_idx in 0..256u64 {
// Walk userspace L4 entries (x86_64: 0-255; ARM64 TTBR0 uses 0-511)
#[cfg(target_arch = "x86_64")]
let l4_range = 0..256u64;
#[cfg(target_arch = "aarch64")]
let l4_range = 0..512u64;
for l4_idx in l4_range {
let l4_entry = &l4_table[l4_idx as usize];
if l4_entry.is_unused() || !l4_entry.flags().contains(PageTableFlags::PRESENT) {
continue;
Expand Down Expand Up @@ -2122,47 +2123,25 @@ pub fn map_user_stack_to_process(
{
// ARM64: user addresses live in TTBR0 and must be mapped directly into
// the process page table, not copied from the kernel (TTBR1) mappings.
crate::serial_println!(
"map_user_stack_to_process [ARM64]: enter mapping {:#x} - {:#x}",
start_page.start_address().as_u64(),
end_page.start_address().as_u64()
);
let flags = PageTableFlags::PRESENT
| PageTableFlags::WRITABLE
| PageTableFlags::USER_ACCESSIBLE;

for page in Page::range_inclusive(start_page, end_page) {
crate::serial_println!(
"map_user_stack_to_process [ARM64]: processing page {:#x}",
page.start_address().as_u64()
);
if let Some(existing_frame) = process_page_table.translate_page(page.start_address()) {
let existing_frame = PhysFrame::<Size4KiB>::containing_address(existing_frame);
log::trace!(
"User stack page {:#x} already mapped to frame {:#x}",
page.start_address().as_u64(),
existing_frame.start_address().as_u64()
);
crate::serial_println!(
"map_user_stack_to_process [ARM64]: page already mapped to frame {:#x}",
existing_frame.start_address().as_u64()
);
mapped_pages += 1;
continue;
}

let frame = match allocate_frame() {
Some(frame) => {
crate::serial_println!(
"map_user_stack_to_process [ARM64]: allocated frame {:#x}",
frame.start_address().as_u64()
);
frame
}
Some(frame) => frame,
None => {
crate::serial_println!(
"map_user_stack_to_process [ARM64]: allocate_frame failed (OOM)"
);
return Err("Out of memory for user stack");
}
};
Expand All @@ -2174,24 +2153,13 @@ pub fn map_user_stack_to_process(
page.start_address().as_u64(),
frame.start_address().as_u64()
);
crate::serial_println!(
"map_user_stack_to_process [ARM64]: map_page ok {:#x} -> {:#x}",
page.start_address().as_u64(),
frame.start_address().as_u64()
);
}
Err(e) => {
log::error!(
"Failed to map user stack page {:#x}: {}",
page.start_address().as_u64(),
e
);
crate::serial_println!(
"map_user_stack_to_process [ARM64]: map_page FAILED {:#x} -> {:#x}: {}",
page.start_address().as_u64(),
frame.start_address().as_u64(),
e
);
return Err("Failed to map user stack page");
}
}
Expand Down Expand Up @@ -2229,14 +2197,6 @@ pub fn map_user_stack_to_process_with_phys(
let stack_size = user_stack_top.as_u64() - user_stack_bottom.as_u64();
let num_pages = stack_size / 4096;

crate::serial_println!(
"map_user_stack_to_process_with_phys: user {:#x}-{:#x}, phys {:#x}, {} pages",
user_stack_bottom.as_u64(),
user_stack_top.as_u64(),
phys_bottom,
num_pages
);

let flags = PageTableFlags::PRESENT
| PageTableFlags::WRITABLE
| PageTableFlags::USER_ACCESSIBLE;
Expand All @@ -2248,13 +2208,6 @@ pub fn map_user_stack_to_process_with_phys(
let page = Page::<Size4KiB>::containing_address(user_vaddr);
let frame = PhysFrame::<Size4KiB>::containing_address(phys_addr);

crate::serial_println!(
" page {}: user {:#x} -> phys {:#x}",
i,
user_vaddr.as_u64(),
phys_addr.as_u64()
);

match process_page_table.map_page(page, frame, flags) {
Ok(()) => {
log::trace!(
Expand All @@ -2264,8 +2217,8 @@ pub fn map_user_stack_to_process_with_phys(
);
}
Err(e) => {
crate::serial_println!(
" FAILED to map page {:#x} -> {:#x}: {}",
log::error!(
"Failed to map page {:#x} -> {:#x}: {}",
user_vaddr.as_u64(),
phys_addr.as_u64(),
e
Expand All @@ -2275,9 +2228,5 @@ pub fn map_user_stack_to_process_with_phys(
}
}

crate::serial_println!(
"map_user_stack_to_process_with_phys: mapped {} pages successfully",
num_pages
);
Ok(())
}
Loading
Loading