Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion kernel/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ version = "0.1.0"
edition = "2021"

[lints.rust]
unexpected_cfgs = { level = "warn", check-cfg = ["cfg(never)"] }
unexpected_cfgs = { level = "warn", check-cfg = ["cfg(never)", "cfg(feature, values(\"particle_animation\"))"] }

[[bin]]
name = "kernel"
Expand Down Expand Up @@ -49,6 +49,7 @@ log = { version = "0.4.17", default-features = false }
spin = "0.9.8"
crossbeam-queue = { version = "0.3", default-features = false, features = ["alloc"] }
futures-util = { version = "0.3.17", default-features = false, features = ["alloc"] }
linked_list_allocator = "0.10"
noto-sans-mono-bitmap = { version = "0.3", default-features = false, features = ["size_16", "regular", "unicode-basic-latin", "unicode-specials"] }

[target.'cfg(target_arch = "x86_64")'.dependencies]
Expand Down
46 changes: 38 additions & 8 deletions kernel/src/arch_impl/aarch64/boot.S
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ curr_el_spx_serror:
// Lower EL using AArch64 (user mode)
.balign 0x80
lower_el_aarch64_sync:
b sync_exception_handler
b lower_el_sync_dispatch
.balign 0x80
lower_el_aarch64_irq:
b irq_handler
Expand Down Expand Up @@ -347,6 +347,25 @@ zero_table_loop:

.section .text

/*
* Dispatch handler for synchronous exceptions from EL0 (userspace).
*
* SVCs (syscalls) are routed to syscall_entry_from_el0 in syscall_entry.S,
* which has proper interrupt masking, reschedule checks, TTBR0 handling,
* and PREEMPT_ACTIVE management.
*
* All other sync exceptions (page faults, etc.) go to the generic
* sync_exception_handler which passes ESR/FAR to the Rust handler.
*
* Uses x16/x17 as scratch (intra-procedure call scratch registers per ABI).
*/
lower_el_sync_dispatch:
mrs x16, esr_el1
lsr x17, x16, #26 // Extract EC field (bits [31:26])
cmp x17, #0x15 // EC 0x15 = SVC instruction from AArch64
b.eq syscall_entry_from_el0
b sync_exception_handler

sync_exception_handler:
// Save all registers
sub sp, sp, #272 // 33 registers × 8 bytes + 8 padding
Expand Down Expand Up @@ -452,6 +471,7 @@ irq_handler:

// Restore all general-purpose registers from the exception frame
// x0 is particularly critical - it contains the fork() return value!
// NOTE: x16 is restored LATER via per-CPU scratch (see below)
ldp x0, x1, [sp, #0]
ldp x2, x3, [sp, #16]
ldp x4, x5, [sp, #32]
Expand All @@ -460,7 +480,8 @@ irq_handler:
ldp x10, x11, [sp, #80]
ldp x12, x13, [sp, #96]
ldp x14, x15, [sp, #112]
ldp x16, x17, [sp, #128]
// Skip x16 here - will be restored via per-CPU scratch after SP switch
ldr x17, [sp, #136] // Restore x17 only
ldp x18, x19, [sp, #144]
ldp x20, x21, [sp, #160]
ldp x22, x23, [sp, #176]
Expand All @@ -469,13 +490,22 @@ irq_handler:
ldp x28, x29, [sp, #224]
ldr x30, [sp, #240]

// Load new SP from user_rsp_scratch
// For userspace returns: this is sp+272 (just popping the exception frame)
// For kernel thread switches: this may be a different thread's saved SP
// Use x16 as scratch - it's an intra-procedure scratch register in ARM64 ABI
// Save frame.x16 to per-CPU ERET scratch (offset 96), using x16/x17 as scratch
mrs x16, tpidr_el1 // x16 = percpu base
ldr x17, [sp, #128] // x17 = frame.x16 (temp)
str x17, [x16, #96] // percpu.eret_scratch = frame.x16

// Re-restore x17 from frame (was used as temp above)
ldr x17, [sp, #136] // x17 = frame.x17 (final value)

// Set SP from user_rsp_scratch (offset 40)
ldr x16, [x16, #40] // x16 = user_rsp_scratch
mov sp, x16 // SP = correct stack top

// Restore x16 from per-CPU ERET scratch
mrs x16, tpidr_el1
ldr x16, [x16, #40]
mov sp, x16
ldr x16, [x16, #96] // x16 = saved frame.x16

eret

unhandled_exception:
Expand Down
4 changes: 4 additions & 0 deletions kernel/src/arch_impl/aarch64/constants.rs
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,10 @@ pub const PERCPU_SAVED_PROCESS_CR3_OFFSET: usize = 80;
/// Offset of exception_cleanup_context flag in PerCpuData.
pub const PERCPU_EXCEPTION_CLEANUP_CONTEXT_OFFSET: usize = 88;

/// Offset of scratch register save area in PerCpuData.
/// Used by assembly ERET paths to save/restore one register across SP switches.
pub const PERCPU_ERET_SCRATCH_OFFSET: usize = 96;

// ============================================================================
// Preempt Count Bit Layout (Linux-compatible)
// ============================================================================
Expand Down
18 changes: 15 additions & 3 deletions kernel/src/arch_impl/aarch64/context_switch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,12 @@ use crate::task::thread::{CpuContext, ThreadPrivilege, ThreadState};
#[inline(always)]
#[allow(dead_code)]
fn raw_uart_char(c: u8) {
// QEMU virt machine UART base address
const UART_BASE: u64 = 0x0900_0000;
// QEMU virt machine UART via HHDM (TTBR1-mapped, safe during context switch)
// Physical 0x0900_0000 is mapped at HHDM_BASE + 0x0900_0000
const HHDM_BASE: u64 = 0xFFFF_0000_0000_0000;
const UART_VIRT: u64 = HHDM_BASE + 0x0900_0000;
unsafe {
let ptr = UART_BASE as *mut u8;
let ptr = UART_VIRT as *mut u8;
core::ptr::write_volatile(ptr, c);
}
}
Expand Down Expand Up @@ -328,6 +330,16 @@ fn switch_to_thread_arm64(thread_id: u64, frame: &mut Aarch64ExceptionFrame) {
// Kernel threads and userspace threads blocked in syscall both need
// kernel context restoration (they're running in kernel mode)
setup_kernel_thread_return_arm64(thread_id, frame);

// CRITICAL: For userspace threads blocked in syscall, set up TTBR0 so
// the correct process page table is active when the syscall completes
// and returns to userspace. Without this, TTBR0 retains the previously-
// running process's page table, causing instruction aborts when the
// thread returns to EL0 with the wrong address space.
if is_blocked_in_syscall && !is_kernel_thread {
set_next_ttbr0_for_thread(thread_id);
switch_ttbr0_if_needed(thread_id);
}
} else {
restore_userspace_context_arm64(thread_id, frame);
}
Expand Down
132 changes: 89 additions & 43 deletions kernel/src/arch_impl/aarch64/syscall_entry.S
Original file line number Diff line number Diff line change
Expand Up @@ -66,36 +66,62 @@ syscall_entry_from_el0:
* ARM64 uses SP_EL1 for kernel exceptions, but we need to
* ensure we're using a proper kernel stack for this thread.
*
* Read kernel_stack_top from per-CPU data via TPIDR_EL1.
* CRITICAL: We must save x9/x10 BEFORE using them as scratch
* for the per-CPU lookup. Otherwise userspace gets kernel
* addresses leaked into x9/x10 on syscall return.
*
* Strategy: temporarily push x9/x10 to the current SP_EL1 stack,
* do the per-CPU lookup, then either:
* - No switch: pop x9/x10 and save normally
* - Switch: copy stashed x9/x10 from old stack into new frame
*/
mrs x9, tpidr_el1 /* x9 = per-CPU base */
cbz x9, .Lno_percpu_switch /* Skip if not initialized */
stp x9, x10, [sp, #-16]! /* Stash user x9/x10 on current stack */

/* Read kernel_stack_top from offset 16 */
mrs x9, tpidr_el1 /* x9 = per-CPU base */
cbz x9, .Lno_stack_switch
ldr x10, [x9, #16] /* x10 = kernel_stack_top */
cbz x10, .Lno_percpu_switch /* Skip if not set */

/* Use the kernel stack */
mov sp, x10
cbz x10, .Lno_stack_switch

.Lno_percpu_switch:
/*
* Allocate exception frame on stack.
* Frame size: 272 bytes (34 * 8, 16-byte aligned)
* This matches Aarch64ExceptionFrame layout.
* Stack switch needed: x10 = new kernel stack top.
* User x9/x10 are stashed at [sp] on the OLD stack.
* Save old SP so we can copy them into the new frame.
*/
mov x9, sp /* x9 = old SP (stash location) */
mov sp, x10 /* Switch to per-CPU kernel stack */
sub sp, sp, #272 /* Allocate exception frame */

/* Copy real user x9/x10 from old stack stash into frame */
ldr x10, [x9] /* x10 = user x9 */
str x10, [sp, #72] /* frame.x9 = user x9 */
ldr x10, [x9, #8] /* x10 = user x10 */
str x10, [sp, #80] /* frame.x10 = user x10 */
/* x8, x11 were NOT clobbered in this path */
str x8, [sp, #64] /* frame.x8 = syscall number */
str x11, [sp, #88] /* frame.x11 = user x11 */
b .Lsave_common

.Lno_stack_switch:
/*
* No stack switch needed. Pop stashed x9/x10 (restoring
* their real user values) and save normally.
*/
sub sp, sp, #272
ldp x9, x10, [sp], #16 /* Pop user x9/x10 from stash */
sub sp, sp, #272 /* Allocate exception frame */
stp x8, x9, [sp, #64] /* frame.x8 = syscall number, frame.x9 */
stp x10, x11, [sp, #80] /* frame.x10, frame.x11 */

.Lsave_common:
/*
* Save all general-purpose registers x0-x29.
* STP stores two 64-bit registers per instruction.
* Save all other general-purpose registers.
* x8-x11 are already saved by the path-specific code above.
* All other registers are unclobbered in both paths.
*/
stp x0, x1, [sp, #0]
stp x2, x3, [sp, #16]
stp x4, x5, [sp, #32]
stp x6, x7, [sp, #48]
stp x8, x9, [sp, #64] /* x8 = syscall number */
stp x10, x11, [sp, #80]
/* x8-x11 already saved above */
stp x12, x13, [sp, #96]
stp x14, x15, [sp, #112]
stp x16, x17, [sp, #128]
Expand Down Expand Up @@ -175,6 +201,18 @@ syscall_entry_from_el0:
str w10, [x9, #32]
.Lskip_preempt_set:

/*
* Pre-set user_rsp_scratch = sp + 272 (the return SP if no context switch).
* If a context switch occurs, the Rust code will overwrite this with the
* new thread's SP. This mirrors the IRQ handler's approach in boot.S.
* PERCPU_USER_RSP_SCRATCH_OFFSET = 40
*/
mrs x9, tpidr_el1
cbz x9, .Lskip_rsp_scratch_set
add x10, sp, #272
str x10, [x9, #40]
.Lskip_rsp_scratch_set:

/*
* Check if rescheduling is needed before returning to userspace.
* Pass frame pointer for potential context switch.
Expand Down Expand Up @@ -285,37 +323,45 @@ syscall_entry_from_el0:
.Lno_ttbr_switch:

/*
* Trace: about to return to userspace.
* Call trace function BEFORE restoring x0/x1 (it will clobber them).
* Pass ELR and SPSR for debugging if needed.
*/
mrs x0, elr_el1
mrs x1, spsr_el1
bl trace_eret_to_el0

/* Now restore x0/x1 from the frame (frame still valid on kernel stack) */
ldp x0, x1, [sp, #0]

/* Deallocate frame */
add sp, sp, #272
/*
* Clear PREEMPT_ACTIVE now that registers are restored.
* Without this, PREEMPT_ACTIVE persists and blocks scheduling.
* Clear PREEMPT_ACTIVE before restoring x0/x1.
* Use x0/x1 as scratch (they haven't been restored yet).
* Must clear BEFORE ERET so next IRQ can do context switches.
*/
mrs x9, tpidr_el1
cbz x9, .Lskip_preempt_final_clear
ldr w10, [x9, #32]
bic w10, w10, #0x10000000 /* Clear bit 28 */
str w10, [x9, #32]
mrs x0, tpidr_el1
cbz x0, .Lskip_preempt_final_clear
ldr w1, [x0, #32]
bic w1, w1, #0x10000000 /* Clear bit 28 */
str w1, [x0, #32]
.Lskip_preempt_final_clear:

/*
* Return to userspace via ERET.
* ERET will:
* - Restore PSTATE from SPSR_EL1
* - Jump to ELR_EL1
* - Switch to EL0
* Restore x0/x1 and switch SP for ERET.
*
* Challenge: we need to set SP = user_rsp_scratch AND restore x0/x1
* from the frame, but after changing SP the frame may not be addressable
* (if a context switch moved us to a different kernel stack).
*
* Solution: save frame.x0 to per-CPU scratch (offset 96), restore x1
* from the frame, switch SP, then restore x0 from per-CPU scratch.
*
* x0 = tpidr_el1 from PREEMPT_ACTIVE clear above.
*/

/* Save frame.x0 to per-CPU ERET scratch */
ldr x1, [sp, #0] /* x1 = frame.x0 */
str x1, [x0, #96] /* percpu.eret_scratch = frame.x0 */

/* Restore x1 from frame (final value) */
ldr x1, [sp, #8] /* x1 = frame.x1 */

/* Set SP from user_rsp_scratch */
ldr x0, [x0, #40] /* x0 = user_rsp_scratch */
mov sp, x0 /* SP = correct kernel stack top */

/* Restore x0 from per-CPU ERET scratch */
mrs x0, tpidr_el1
ldr x0, [x0, #96] /* x0 = saved frame.x0 */

eret

/* Should never reach here - debug marker */
Expand Down
15 changes: 11 additions & 4 deletions kernel/src/arch_impl/aarch64/syscall_entry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1165,9 +1165,7 @@ fn sys_exec_aarch64(
// Trace: ELF file loaded from filesystem
super::trace::trace_exec(b'L');

let boxed_slice = elf_vec.into_boxed_slice();
let elf_data = Box::leak(boxed_slice) as &'static [u8];
let leaked_name: &'static str = Box::leak(program_name.into_boxed_str());
let elf_data = elf_vec.as_slice();

let current_pid = {
let manager_guard = crate::process::manager();
Expand Down Expand Up @@ -1202,7 +1200,7 @@ fn sys_exec_aarch64(
// Trace: calling exec_process_with_argv (process manager)
super::trace::trace_exec(b'M');

match manager.exec_process_with_argv(current_pid, elf_data, Some(leaked_name), &argv_slices) {
match manager.exec_process_with_argv(current_pid, elf_data, Some(&program_name), &argv_slices) {
Ok((new_entry_point, new_rsp)) => {
// Trace: exec_process_with_argv succeeded
super::trace::trace_exec(b'S');
Expand Down Expand Up @@ -1277,6 +1275,15 @@ fn sys_exec_aarch64(
}
// Trace: TTBR0 page table switched
super::trace::trace_exec(b'P');

// CRITICAL: Update saved_process_cr3 so the assembly ERET
// path doesn't restore the OLD (now-freed) page table.
// Without this, the .Lrestore_saved_ttbr path in syscall_entry.S
// switches TTBR0 back to the pre-exec page table, which has
// been deallocated by exec_process_with_argv.
unsafe {
Aarch64PerCpu::set_saved_process_cr3(new_ttbr0);
}
}
}

Expand Down
6 changes: 3 additions & 3 deletions kernel/src/main_aarch64.rs
Original file line number Diff line number Diff line change
Expand Up @@ -421,11 +421,11 @@ pub extern "C" fn kernel_main() -> ! {
// Try to load and run userspace init_shell from ext2 or test disk
if device_count > 0 {
boot_raw_char(b'2'); // Inside if
serial_println!("[boot] Loading userspace init_shell from ext2...");
serial_println!("[boot] Loading userspace init from ext2...");
boot_raw_char(b'3'); // After serial_println
match run_userspace_from_ext2("/bin/init_shell") {
match run_userspace_from_ext2("/sbin/init") {
Err(e) => {
serial_println!("[boot] Failed to load init_shell from ext2: {}", e);
serial_println!("[boot] Failed to load init from ext2: {}", e);
serial_println!("[boot] Loading userspace init_shell from test disk...");
match kernel::boot::test_disk::run_userspace_from_disk("init_shell") {
Err(e) => {
Expand Down
Loading
Loading