diff --git a/.ci/run-app-tests.sh b/.ci/run-app-tests.sh index 27db98c1..a8f58002 100755 --- a/.ci/run-app-tests.sh +++ b/.ci/run-app-tests.sh @@ -26,7 +26,10 @@ test_app() { exit_code=$? # Check phase - if echo "$output" | grep -qiE "(trap|exception|fault|panic|illegal|segfault)"; then + # Filter out expected PMP termination messages before crash detection + local filtered_output + filtered_output=$(echo "$output" | grep -v "\[PMP\] Task terminated") + if echo "$filtered_output" | grep -qiE "(trap|exception|fault|panic|illegal|segfault)"; then echo "[!] Crash detected" return 1 elif [ $exit_code -eq 124 ] || [ $exit_code -eq 0 ]; then diff --git a/.ci/run-functional-tests.sh b/.ci/run-functional-tests.sh index e66de5af..e4c3911d 100755 --- a/.ci/run-functional-tests.sh +++ b/.ci/run-functional-tests.sh @@ -11,7 +11,7 @@ TOOLCHAIN_TYPE=${TOOLCHAIN_TYPE:-gnu} declare -A FUNCTIONAL_TESTS FUNCTIONAL_TESTS["mutex"]="Fairness: PASS,Mutual Exclusion: PASS,Data Consistency: PASS,Overall: PASS" FUNCTIONAL_TESTS["semaphore"]="Overall: PASS" -FUNCTIONAL_TESTS["umode"]="PASS: sys_tid() returned,PASS: sys_uptime() returned,[EXCEPTION] Illegal instruction" +FUNCTIONAL_TESTS["umode"]="[PASS] returned tid=,[PASS] returned uptime=,[EXCEPTION] Illegal instruction" #FUNCTIONAL_TESTS["test64"]="Unsigned Multiply: PASS,Unsigned Divide: PASS,Signed Multiply: PASS,Signed Divide: PASS,Left Shifts: PASS,Logical Right Shifts: PASS,Arithmetic Right Shifts: PASS,Overall: PASS" #FUNCTIONAL_TESTS["suspend"]="Suspend: PASS,Resume: PASS,Self-Suspend: PASS,Overall: PASS" diff --git a/Documentation/hal-riscv-context-switch.md b/Documentation/hal-riscv-context-switch.md index d274bae8..e0ae2b31 100644 --- a/Documentation/hal-riscv-context-switch.md +++ b/Documentation/hal-riscv-context-switch.md @@ -165,7 +165,7 @@ void *hal_build_initial_frame(void *stack_top, frame[FRAME_EPC] = (uint32_t) task_entry; /* SP value for when ISR returns (stored in frame[33]). - * For U-mode: Set to user stack top. + * For U-mode: Set to user stack top minus 256-byte guard zone. * For M-mode: Set to frame + ISR_STACK_FRAME_SIZE. */ if (user_mode && kernel_stack) { diff --git a/Documentation/pmp-memory-protection.md b/Documentation/pmp-memory-protection.md new file mode 100644 index 00000000..65a40c75 --- /dev/null +++ b/Documentation/pmp-memory-protection.md @@ -0,0 +1,305 @@ +# PMP: Memory Protection + +## Overview + +Linmo operates entirely in Machine mode by default, with all tasks sharing the same physical address space. +A misbehaving task can corrupt kernel data structures or interfere with other tasks, compromising system stability. + +Physical Memory Protection provides hardware-enforced access control at the physical address level. +Unlike an MMU, PMP requires no page tables or TLB management, making it suitable for resource-constrained RISC-V systems. +PMP enforces read, write, and execute permissions for up to 16 configurable memory regions. + +The design draws inspiration from the F9 microkernel, adopting a three-layer abstraction: +- **Memory Pools** define static physical regions at boot time, derived from linker symbols. +- **Flexpages** represent dynamically protected memory ranges with associated permissions. +- **Memory Spaces** group flexpages into per-task protection domains. + +## Architecture + +### Memory Abstraction Layers + +```mermaid +graph TD + classDef hw fill:#424242,stroke:#000,color:#fff,stroke-width:2px + classDef static fill:#e1f5fe,stroke:#01579b,stroke-width:2px + classDef dynamic fill:#fff3e0,stroke:#e65100,stroke-width:2px + classDef container fill:#e8f5e9,stroke:#1b5e20,stroke-width:2px + classDef task fill:#f3e5f5,stroke:#6a1b9a,stroke-width:2px + + subgraph L0 ["Hardware"] + PMP[PMP Registers]:::hw + end + + subgraph L1 ["Memory Pools"] + MP["Static Regions
(.text, .data, .bss)"]:::static + end + + subgraph L2 ["Flexpages"] + FP["fpage_t
base / size / rwx"]:::dynamic + end + + subgraph L3 ["Memory Spaces"] + AS["memspace_t
per-task domain"]:::container + end + + subgraph L4 ["Task"] + TCB[TCB]:::task + end + + TCB -->|owns| AS + AS -->|contains| FP + MP -->|initializes| FP + AS -->|configures| PMP +``` + +The core structures: + +```c +typedef struct fpage { + struct fpage *as_next; /* Next in address space list */ + struct fpage *map_next; /* Next in mapping chain */ + struct fpage *pmp_next; /* Next in PMP queue */ + uint32_t base; /* Physical base address */ + uint32_t size; /* Region size */ + uint32_t rwx; /* R/W/X permission bits */ + uint32_t pmp_id; /* PMP region index */ + uint32_t flags; /* Status flags */ + uint32_t priority; /* Eviction priority */ + int used; /* Usage counter */ +} fpage_t; +``` +```c +typedef struct memspace { + uint32_t as_id; /* Memory space identifier */ + struct fpage *first; /* Head of flexpage list */ + struct fpage *pmp_first; /* Head of PMP-loaded list */ + struct fpage *pmp_stack; /* Stack regions */ + uint32_t shared; /* Shared flag */ +} memspace_t; +``` + +### TOR Mode and Paired Entries + +TOR (Top Of Range) mode defines region *i* as `[pmpaddr[i-1], pmpaddr[i])`. +This works well for contiguous kernel regions where boundaries naturally chain together. + +For dynamically allocated user regions at arbitrary addresses, Linmo uses paired entries: + +``` +┌─────────────────────────────────────────┐ +│ Entry N: base_addr (disabled) │ +│ Entry N+1: top_addr (TOR, R|W) │ +│ │ +│ Region N+1 = [base_addr, top_addr) │ +└─────────────────────────────────────────┘ +``` + +The first entry sets the lower bound with permissions disabled. +The second entry defines the upper bound with TOR mode and the desired permissions. +This consumes two hardware slots per user region but allows non-contiguous regions at arbitrary addresses. + +### Kernel and User Regions + +Kernel regions protect `.text`, `.data`, and `.bss` sections: + +```c +static const mempool_t kernel_mempools[] = { + DECLARE_MEMPOOL("kernel_text", + &_stext, &_etext, + PMPCFG_PERM_RX, + PMP_PRIORITY_KERNEL), + DECLARE_MEMPOOL("kernel_data", + &_sdata, &_edata, + PMPCFG_PERM_RW, + PMP_PRIORITY_KERNEL), + DECLARE_MEMPOOL("kernel_bss", + &_sbss, &_ebss, + PMPCFG_PERM_RW, + PMP_PRIORITY_KERNEL), +}; +``` + +Kernel heap and stack are intentionally excluded—PMP is ineffective for M-mode, and kernel heap/stack is only used in M-mode. +This keeps Regions 0-2 for kernel, leaving Region 3+ available for user dynamic regions with correct TOR address ordering. + +Kernel regions use a hybrid lock strategy: + +| Lock Type | Location | Effect | +|-----------|---------------------------|-------------------------| +| Software | `regions[i].locked = 1` | Allocator skips slot | +| Hardware | `PMPCFG_L` NOT set | M-mode access preserved | + +Setting the hardware lock bit would deny M-mode access. + +User regions protect task stacks and are dynamically loaded during context switches. +When PMP slots are exhausted, user regions can be evicted and reloaded on demand. + +## Memory Isolation + +### Context Switching + +Context switching reconfigures PMP in two phases: + +```mermaid +flowchart LR + subgraph Eviction + E1[Iterate pmp_first] --> E2[Disable region in hardware] + E2 --> E3["Set pmp_id = INVALID"] + end + subgraph Loading + L1[Reset pmp_first = NULL] --> L2{Already loaded?} + L2 -->|Yes| L3[Add to tracking list] + L2 -->|No| L4[Find free slot] + L4 --> L5[Load to hardware] + L5 --> L3 + end + Eviction --> Loading +``` + +**Eviction phase** iterates the outgoing task's `pmp_first` linked list. +Each flexpage is disabled in hardware, and `pmp_id` is set to `PMP_INVALID_REGION (0xFF)` to mark it as unloaded. + +**Loading phase** rebuilds `pmp_first` from scratch. +This prevents circular references—if `pmp_first` is not cleared, reloading a flexpage could create a self-loop in the linked list. +For each flexpage in the incoming task's memory space: +- **Already loaded** (shared regions): Add directly to tracking list +- **Not loaded**: Find a free slot via `find_free_region_slot()` and load + +If all slots are occupied, remaining regions load on-demand through the fault handler (lazy loading). + +### Per-Task Kernel Stack + +U-mode trap handling requires a kernel stack to save context. +If multiple U-mode tasks share a single kernel stack, Task A's context frame is overwritten when Task B traps—the ISR writes to the same position on the shared stack. + +Linmo allocates a dedicated 512-byte kernel stack for each U-mode task: + +```c +typedef struct tcb { + /* ... */ + void *kernel_stack; /* Base address of kernel stack (NULL for M-mode) */ + size_t kernel_stack_size; /* Size of kernel stack in bytes (0 for M-mode) */ +} tcb_t; +``` + +M-mode tasks do not require a separate kernel stack—they use the task stack directly without privilege transition. + +During context switch, the scheduler saves the incoming task's kernel stack top to a global variable. +The ISR restore path loads this value into `mscratch`, enabling the next U-mode trap to use the correct per-task kernel stack. + +### Fault Handling and Task Termination + +PMP access faults occur when a U-mode task attempts to access memory outside its loaded regions. +The trap handler routes these faults to the PMP fault handler, which attempts recovery or terminates the task. + +The fault handler first searches the task's memory space for a flexpage containing the faulting address. +If found and the flexpage is not currently loaded in hardware, it loads the region and returns to the faulting instruction. +This enables lazy loading—regions not loaded during context switch are loaded on first access. + +If no matching flexpage exists, the access is unauthorized (e.g., kernel memory or another task's stack). +If the flexpage is already loaded but still faulted, recovery is impossible. +In either case, the handler marks the task as `TASK_ZOMBIE` and returns a termination code. + +```mermaid +flowchart TD + A[Find flexpage for fault_addr] --> B{Flexpage found?} + B -->|No| F[Unauthorized access] + B -->|Yes| C{Already loaded in hardware?} + C -->|No| D[Load to hardware] + D --> E[Return RECOVERED] + C -->|Yes| F + F --> G[Mark TASK_ZOMBIE] + G --> H[Return TERMINATE] +``` + +The trap handler interprets the return value: + +| Return Code | Action | +|-------------------------|-----------------------------------------------| +| `PMP_FAULT_RECOVERED` | Resume execution at faulting instruction | +| `PMP_FAULT_TERMINATE` | Print diagnostic, invoke dispatcher | +| `PMP_FAULT_UNHANDLED` | Fall through to default exception handler | + +Terminated tasks are not immediately destroyed. +The dispatcher calls a cleanup routine before selecting the next runnable task. +This routine iterates zombie tasks, evicts their PMP regions, frees their memory spaces and stacks, and removes them from the task list. +Deferring cleanup to the dispatcher avoids modifying task structures from within interrupt context. + +## Best Practices + +### Hardware Limitations + +PMP provides 16 hardware slots shared between kernel and user regions. +Kernel regions occupy slots 0-2 and cannot be evicted. +Each user region requires two slots (paired entries for TOR mode). + +| Resource | Limit | +|-----------------------------|----------------------------| +| Total PMP slots | 16 | +| Kernel slots | 3 (fixed at boot) | +| Slots per user region | 2 (paired entries) | +| Max concurrent user regions | 6 (13 ÷ 2) | + +With only 6 concurrent user regions, systems spawning many U-mode tasks rely on lazy loading through the fault handler. +This incurs runtime overhead as regions are dynamically reloaded during context switches. +Applications with many concurrent U-mode tasks should consider this tradeoff between isolation granularity and performance. + +### Task Creation Guidelines + +U-mode tasks receive automatic PMP protection. +The kernel allocates a memory space and registers the task stack as a protected flexpage. + +Applications use `mo_task_spawn()`, which automatically creates tasks in the appropriate privilege mode based on the build configuration: + +```c +/* Standard usage - automatically inherits parent's privilege mode */ +mo_task_spawn(task_func, stack_size); +``` + +Privileged applications may need explicit control over task privilege modes, such as when testing mixed-privilege scheduling or implementing system services that manage both trusted and isolated tasks. +For these scenarios, use the explicit creation functions: + +```c +/* Explicitly create M-mode task: trusted code, full memory access */ +mo_task_spawn_kernel(task_func, stack_size); + +/* Explicitly create U-mode task: isolated execution, PMP protected */ +mo_task_spawn_user(task_func, stack_size); +``` + +Choose the appropriate privilege level: +- **M-mode**: Trusted kernel components requiring unrestricted memory access +- **U-mode**: Application tasks where memory isolation is desired + +### Common Pitfalls + +1. Assuming PMP protects the kernel + + PMP only restricts Supervisor and User modes. + Machine mode has unrestricted access regardless of PMP configuration. + This is intentional—the kernel must access all memory to manage protection. + + ```c + /* This code in M-mode bypasses PMP entirely */ + void kernel_func(void) { + volatile uint32_t *user_stack = (uint32_t *)0x80007000; + *user_stack = 0; /* No fault—M-mode ignores PMP */ + } + ``` + + PMP protects user tasks from each other but does not protect the kernel from itself. + +2. Exhausting PMP slots + + With only ~6 user regions available, spawning many U-mode tasks causes PMP slot exhaustion. + Subsequent tasks rely entirely on lazy loading, degrading performance. + +3. Mixing M-mode and U-mode incorrectly + + M-mode tasks spawned with `mo_task_spawn()` do not receive memory spaces. + PMP-related functions check for NULL memory spaces and return early, so calling them on M-mode tasks has no effect. + +## References + +- [RISC-V Privileged Architecture](https://riscv.github.io/riscv-isa-manual/snapshot/privileged/) +- [Memory Protection for Embedded RISC-V Systems](https://nva.sikt.no/registration/0198eb345173-b2a7ef5c-8e7e-4b98-bd3e-ff9c469ce36d) diff --git a/Makefile b/Makefile index a4bf6da1..447c26b7 100644 --- a/Makefile +++ b/Makefile @@ -13,7 +13,8 @@ BUILD_LIB_DIR := $(BUILD_DIR)/lib # All other apps run in U-mode by default (secure) MMODE_APPS := cond coop cpubench echo hello mqueues mutex \ pipes pipes_small pipes_struct prodcons progress \ - rtsched semaphore suspend test64 test_libc timer timer_kill + rtsched semaphore suspend test64 test_libc timer timer_kill \ + privilege_switch # Auto-detect: if building an M-mode app, enable CONFIG_PRIVILEGED ifneq ($(filter $(MAKECMDGOALS),$(MMODE_APPS)),) @@ -28,7 +29,7 @@ include arch/$(ARCH)/build.mk INC_DIRS += -I $(SRC_DIR)/include \ -I $(SRC_DIR)/include/lib -KERNEL_OBJS := timer.o mqueue.o pipe.o semaphore.o mutex.o logger.o error.o syscall.o task.o main.o +KERNEL_OBJS := timer.o mqueue.o pipe.o semaphore.o mutex.o logger.o error.o syscall.o task.o memprot.o main.o KERNEL_OBJS := $(addprefix $(BUILD_KERNEL_DIR)/,$(KERNEL_OBJS)) deps += $(KERNEL_OBJS:%.o=%.o.d) @@ -40,7 +41,7 @@ deps += $(LIB_OBJS:%.o=%.o.d) APPS := coop echo hello mqueues semaphore mutex cond \ pipes pipes_small pipes_struct prodcons progress \ rtsched suspend test64 timer timer_kill \ - cpubench test_libc umode + cpubench test_libc umode privilege_switch pmp # Output files for __link target IMAGE_BASE := $(BUILD_DIR)/image diff --git a/app/mutex.c b/app/mutex.c index c332687d..f804d9c2 100644 --- a/app/mutex.c +++ b/app/mutex.c @@ -133,9 +133,8 @@ void monitor_task(void) mo_task_yield(); } - /* Wait a bit for tasks to fully complete */ - for (int i = 0; i < 50; i++) - mo_task_yield(); + /* Wait a bit for tasks to fully complete and logs to flush */ + sys_tdelay(100); /* Final report */ printf("\n=== FINAL RESULTS ===\n"); @@ -190,10 +189,10 @@ int32_t app_main(void) printf("Binary semaphore created successfully\n"); - /* Create tasks */ - int32_t task_a_id = mo_task_spawn(task_a, 1024); - int32_t task_b_id = mo_task_spawn(task_b, 1024); - int32_t monitor_id = mo_task_spawn(monitor_task, 1024); + /* Create tasks with larger stack for PMP/printf overhead */ + int32_t task_a_id = mo_task_spawn(task_a, 2048); + int32_t task_b_id = mo_task_spawn(task_b, 2048); + int32_t monitor_id = mo_task_spawn(monitor_task, 2048); int32_t idle_id = mo_task_spawn(idle_task, 512); if (task_a_id < 0 || task_b_id < 0 || monitor_id < 0 || idle_id < 0) { diff --git a/app/pmp.c b/app/pmp.c new file mode 100644 index 00000000..ef659f81 --- /dev/null +++ b/app/pmp.c @@ -0,0 +1,322 @@ +/* PMP Memory Isolation Test + * + * Validates PMP-based memory protection implementation. + * + * Test Suite: + * Test 1: Context Switch & Stack Integrity + * - Validates PMP correctly isolates task stacks during context + * switches + * - Runs to completion, reports PASS/FAIL + * + * Test 2: Kernel Protection (Destructive) + * - Validates U-mode cannot write to kernel memory + * - Triggers PMP fault and task termination + * + * Test 3: Inter-Task Isolation (Destructive) + * - Validates U-mode cannot access another task's stack + * - Triggers PMP fault and task termination + */ + +#include + +/* Test configuration */ +#define MAX_ITERATIONS 10 +#define STACK_MAGIC_A 0xAAAAAAAA +#define STACK_MAGIC_B 0xBBBBBBBB +#define STACK_MAGIC_C 0xCCCCCCCC + +/* Test state tracking */ +static volatile int tests_passed = 0; +static volatile int tests_failed = 0; +static volatile int tasks_completed = 0; + +/* Cross-task attack: Task B exports its stack address for attacker task */ +static volatile uint32_t *task_b_stack_addr = NULL; + +/* External kernel symbols */ +extern uint32_t _stext, _etext; +extern uint32_t _sdata, _edata; + +/* ======================================================================== + * Test 1: Context Switch & Stack Integrity Check + * ======================================================================== */ + +/* Task A: Stack integrity validation with magic value 0xAAAAAAAA */ +void task_a_integrity(void) +{ + /* Allocate critical data on stack */ + volatile uint32_t stack_guard = STACK_MAGIC_A; + volatile uint32_t iteration_count = 0; + + for (int i = 0; i < MAX_ITERATIONS; i++) { + iteration_count = i + 1; + + sys_tyield(); + + /* Verify stack integrity */ + if (stack_guard != STACK_MAGIC_A) { + umode_printf( + "[Task A] FAIL: Stack corrupted! " + "Expected 0x%08x, got 0x%08x at iteration %d\n", + (unsigned int) STACK_MAGIC_A, (unsigned int) stack_guard, + (int) iteration_count); + tests_failed++; + tasks_completed++; + while (1) + sys_tyield(); + } + + /* Verify iteration counter */ + if (iteration_count != (uint32_t) (i + 1)) { + umode_printf("[Task A] FAIL: Iteration counter corrupted!\n"); + tests_failed++; + tasks_completed++; + while (1) + sys_tyield(); + } + } + + umode_printf("[Task A] PASS: Stack integrity verified across %d switches\n", + MAX_ITERATIONS); + tests_passed++; + tasks_completed++; + + /* Keep task alive */ + while (1) { + for (int i = 0; i < 20; i++) + sys_tyield(); + } +} + +/* Task B: Stack integrity validation with magic value 0xBBBBBBBB */ +void task_b_integrity(void) +{ + volatile uint32_t stack_guard = STACK_MAGIC_B; + volatile uint32_t checksum = 0; + + /* Export stack address for cross-task attack test */ + task_b_stack_addr = &stack_guard; + + for (int i = 0; i < MAX_ITERATIONS; i++) { + checksum += (i + 1); + + sys_tyield(); + + if (stack_guard != STACK_MAGIC_B) { + umode_printf( + "[Task B] FAIL: Stack guard corrupted! " + "Expected 0x%08x, got 0x%08x\n", + (unsigned int) STACK_MAGIC_B, (unsigned int) stack_guard); + tests_failed++; + tasks_completed++; + while (1) + sys_tyield(); + } + + uint32_t expected_checksum = ((i + 1) * (i + 2)) / 2; + if (checksum != expected_checksum) { + umode_printf( + "[Task B] FAIL: Checksum mismatch! " + "Expected %u, got %u\n", + (unsigned int) expected_checksum, (unsigned int) checksum); + tests_failed++; + tasks_completed++; + while (1) + sys_tyield(); + } + } + + umode_printf("[Task B] PASS: Stack integrity and checksum verified\n"); + tests_passed++; + tasks_completed++; + + while (1) { + for (int i = 0; i < 20; i++) + sys_tyield(); + } +} + +/* Task C: Stack integrity with array operations */ +void task_c_integrity(void) +{ + volatile uint32_t stack_array[4] = {STACK_MAGIC_C, STACK_MAGIC_C + 1, + STACK_MAGIC_C + 2, STACK_MAGIC_C + 3}; + + for (int i = 0; i < MAX_ITERATIONS; i++) { + sys_tyield(); + + for (int j = 0; j < 4; j++) { + uint32_t expected = STACK_MAGIC_C + j; + if (stack_array[j] != expected) { + umode_printf( + "[Task C] FAIL: Array[%d] corrupted! " + "Expected 0x%08x, got 0x%08x\n", + j, (unsigned int) expected, (unsigned int) stack_array[j]); + tests_failed++; + tasks_completed++; + while (1) + sys_tyield(); + } + } + } + + umode_printf("[Task C] PASS: Stack array integrity verified\n"); + tests_passed++; + tasks_completed++; + + while (1) { + for (int i = 0; i < 20; i++) + sys_tyield(); + } +} + +/* ======================================================================== + * Test 2: Kernel Protection (Destructive - Triggers Fault) + * ======================================================================== */ + +/* U-mode write to kernel memory (triggers PMP fault) */ +void task_kernel_attack(void) +{ + sys_tdelay(50); /* Wait for Test 1 to complete */ + + umode_printf("\n=== Test 2: Kernel Protection ===\n"); + umode_printf("Attempting to write to kernel .text at %p\n", + (void *) &_stext); + umode_printf("Expected: [PMP] Task terminated\n"); + umode_printf("\nResult:\n"); + + sys_tdelay(10); + + volatile uint32_t *kernel_addr = (volatile uint32_t *) &_stext; + *kernel_addr = 0xDEADBEEF; + + /* Should not reach here - PMP should terminate this task */ + umode_printf("FAIL: Successfully wrote to kernel memory!\n"); + tests_failed++; + + while (1) + sys_tyield(); +} + +/* ======================================================================== + * Test 3: Inter-Task Isolation (Destructive - Triggers Fault) + * ======================================================================== */ + +/* U-mode task attempts to read another task's stack (triggers PMP fault) */ +void task_cross_attack(void) +{ + /* Wait for Task B to export its stack address */ + while (!task_b_stack_addr) + sys_tyield(); + + sys_tdelay(70); /* Wait for Test 2 to complete */ + + umode_printf("\n=== Test 3: Inter-Task Isolation ===\n"); + umode_printf("Attempting to read Task B's stack at %p\n", + (void *) task_b_stack_addr); + umode_printf("Expected: [PMP] Task terminated\n"); + umode_printf("\nResult:\n"); + + sys_tdelay(10); + + /* Attempt to read Task B's stack - should trigger PMP fault */ + volatile uint32_t stolen_value = *task_b_stack_addr; + + /* Should not reach here - PMP should terminate this task */ + umode_printf("FAIL: Successfully read Task B's stack! Value: 0x%08x\n", + (unsigned int) stolen_value); + tests_failed++; + + while (1) + sys_tyield(); +} + + + +/* ======================================================================== + * Monitor Task + * ======================================================================== */ + +void monitor_task(void) +{ + umode_printf("\n"); + umode_printf("=================================================\n"); + umode_printf(" PMP Memory Isolation Test Suite\n"); + umode_printf("=================================================\n"); + umode_printf("Tests:\n"); + umode_printf(" [Test 1] Context Switch & Stack Integrity\n"); + umode_printf(" [Test 2] Kernel Protection\n"); + umode_printf(" [Test 3] Inter-Task Isolation\n"); + umode_printf("=================================================\n\n"); + + /* Wait for Test 1 tasks to complete */ + int cycles = 0; + while (tasks_completed < 3 && cycles < 200) { + cycles++; + for (int i = 0; i < 10; i++) + sys_tyield(); + } + + /* Report Test 1 results */ + umode_printf("\n=== Test 1: Context Switch & Stack Integrity ===\n"); + umode_printf("Tasks: %d/3, Passed: %d, Failed: %d\n", tasks_completed, + tests_passed, tests_failed); + + if (tasks_completed == 3 && tests_passed == 3 && tests_failed == 0) { + umode_printf("Status: PASS\n\n"); + } else { + umode_printf("Status: FAIL\n\n"); + } + + /* Wait for Test 2 and 3 to complete */ + int failed_before = tests_failed; + sys_tdelay(150); + + /* Verify Test 2/3 results - if tests_failed didn't increase, PMP worked */ + if (tests_failed == failed_before) { + umode_printf("\nStatus: PASS\n"); + } else { + umode_printf("\nStatus: FAIL\n"); + } + + /* Final summary */ + umode_printf("\n=================================================\n"); + if (tests_failed == 0 && tests_passed >= 3) { + umode_printf("ALL PMP TESTS PASSED\n"); + } else { + umode_printf("PMP TESTS FAILED: %d test(s) failed\n", tests_failed); + } + umode_printf("=================================================\n"); + + while (1) { + for (int i = 0; i < 50; i++) + sys_tyield(); + } +} + +/* ======================================================================== + * Application Entry Point + * ======================================================================== */ + +int32_t app_main(void) +{ + /* Create Test 1 tasks - Context Switch & Stack Integrity */ + int32_t task_a = mo_task_spawn(task_a_integrity, 1024); + int32_t task_b = mo_task_spawn(task_b_integrity, 1024); + int32_t task_c = mo_task_spawn(task_c_integrity, 1024); + int32_t monitor = mo_task_spawn(monitor_task, 1024); + + /* Test 2: Kernel Protection */ + int32_t kernel_test = mo_task_spawn(task_kernel_attack, 1024); + + /* Test 3: Inter-Task Isolation */ + int32_t cross_test = mo_task_spawn(task_cross_attack, 1024); + + if (task_a < 0 || task_b < 0 || task_c < 0 || monitor < 0 || + kernel_test < 0 || cross_test < 0) { + printf("ERROR: Failed to create test tasks\n"); + return false; + } + + return true; /* Enable preemptive scheduling */ +} diff --git a/app/privilege_switch.c b/app/privilege_switch.c new file mode 100644 index 00000000..ce33f40c --- /dev/null +++ b/app/privilege_switch.c @@ -0,0 +1,38 @@ +#include + +/* M-mode task: Continuously delays to test M-mode ecall context switch */ +void mmode_task(void) +{ + int iteration = 0; + while (1) { + CRITICAL_ENTER(); + printf("[M-mode] iteration %d\n", iteration++); + CRITICAL_LEAVE(); + mo_task_delay(2); + } +} + +/* U-mode task: Continuously delays to test U-mode syscall and kernel stack */ +void umode_task(void) +{ + int iteration = 0; + while (1) { + umode_printf("[U-mode] iteration %d\n", iteration++); + sys_tdelay(2); + } +} + +int32_t app_main(void) +{ + printf("[Kernel] Privilege Mode Switching Test: M-mode <-> U-mode\n"); + + /* This is a privileged test application (M-mode) designed to verify + * mixed-privilege scheduling. We explicitly use internal APIs to spawn + * tasks in specific modes, bypassing the standard mo_task_spawn() macro + * which defaults to the parent's mode. + */ + mo_task_spawn_kernel(mmode_task, DEFAULT_STACK_SIZE); + mo_task_spawn_user(umode_task, DEFAULT_STACK_SIZE); + + return 1; +} diff --git a/arch/riscv/boot.c b/arch/riscv/boot.c index 55ba106f..e30da2b4 100644 --- a/arch/riscv/boot.c +++ b/arch/riscv/boot.c @@ -16,7 +16,7 @@ extern uint32_t _sbss, _ebss; /* C entry points */ void main(void); -void do_trap(uint32_t cause, uint32_t epc); +void do_trap(uint32_t cause, uint32_t epc, uint32_t isr_sp); void hal_panic(void); /* Current task's kernel stack top (set by dispatcher, NULL for M-mode tasks) */ diff --git a/arch/riscv/build.mk b/arch/riscv/build.mk index a2cd1e7b..39425b3f 100644 --- a/arch/riscv/build.mk +++ b/arch/riscv/build.mk @@ -75,7 +75,7 @@ LDFLAGS += --gc-sections ARFLAGS = r LDSCRIPT = $(ARCH_DIR)/riscv32-qemu.ld -HAL_OBJS := boot.o hal.o muldiv.o +HAL_OBJS := boot.o hal.o muldiv.o pmp.o HAL_OBJS := $(addprefix $(BUILD_KERNEL_DIR)/,$(HAL_OBJS)) deps += $(HAL_OBJS:%.o=%.o.d) diff --git a/arch/riscv/csr.h b/arch/riscv/csr.h index 2f27ed81..081c2c7c 100644 --- a/arch/riscv/csr.h +++ b/arch/riscv/csr.h @@ -179,3 +179,82 @@ /* Machine Scratch Register - For temporary storage during traps */ #define CSR_MSCRATCH 0x340 + +/* PMP Address Registers (pmpaddr0-pmpaddr15) - 16 regions maximum + * In TOR (Top-of-Range) mode, these define the upper boundary of each region. + * The lower boundary is defined by the previous region's upper boundary. + */ +#define CSR_PMPADDR0 0x3b0 +#define CSR_PMPADDR1 0x3b1 +#define CSR_PMPADDR2 0x3b2 +#define CSR_PMPADDR3 0x3b3 +#define CSR_PMPADDR4 0x3b4 +#define CSR_PMPADDR5 0x3b5 +#define CSR_PMPADDR6 0x3b6 +#define CSR_PMPADDR7 0x3b7 +#define CSR_PMPADDR8 0x3b8 +#define CSR_PMPADDR9 0x3b9 +#define CSR_PMPADDR10 0x3ba +#define CSR_PMPADDR11 0x3bb +#define CSR_PMPADDR12 0x3bc +#define CSR_PMPADDR13 0x3bd +#define CSR_PMPADDR14 0x3be +#define CSR_PMPADDR15 0x3bf + +/* PMP Configuration Registers (pmpcfg0-pmpcfg3) + * Each configuration register controls 4 PMP regions (on RV32). + * pmpcfg0 controls pmpaddr0-3, pmpcfg1 controls pmpaddr4-7, etc. + */ +#define CSR_PMPCFG0 0x3a0 +#define CSR_PMPCFG1 0x3a1 +#define CSR_PMPCFG2 0x3a2 +#define CSR_PMPCFG3 0x3a3 + +/* PMP Configuration Field Bits (8 bits per region within pmpcfg) + * Layout in each byte of pmpcfg: + * Bit 7: L (Lock) - Locks this region until hardware reset + * Bits 6-5: Reserved + * Bits 4-3: A (Address Matching Mode) + * Bit 2: X (Execute permission) + * Bit 1: W (Write permission) + * Bit 0: R (Read permission) + */ + +/* Lock bit: Prevents further modification of this region */ +#define PMPCFG_L (1U << 7) + +/* Address Matching Mode (bits 3-4) + * Choose TOR mode for no alignment requirements on region sizes, and support + * for arbitrary address ranges. + */ +#define PMPCFG_A_SHIFT 3 +#define PMPCFG_A_MASK (0x3U << PMPCFG_A_SHIFT) +#define PMPCFG_A_OFF (0x0U << PMPCFG_A_SHIFT) /* Null region (disabled) */ +#define PMPCFG_A_TOR (0x1U << PMPCFG_A_SHIFT) /* Top-of-Range mode */ + +/* Permission bits */ +#define PMPCFG_X (1U << 2) /* Execute permission */ +#define PMPCFG_W (1U << 1) /* Write permission */ +#define PMPCFG_R (1U << 0) /* Read permission */ + +/* Common permission combinations */ +#define PMPCFG_PERM_NONE (0x0U) /* No access */ +#define PMPCFG_PERM_R (PMPCFG_R) /* Read-only */ +#define PMPCFG_PERM_RW (PMPCFG_R | PMPCFG_W) /* Read-Write */ +#define PMPCFG_PERM_X (PMPCFG_X) /* Execute-only */ +#define PMPCFG_PERM_RX (PMPCFG_R | PMPCFG_X) /* Read-Execute */ +#define PMPCFG_PERM_RWX (PMPCFG_R | PMPCFG_W | PMPCFG_X) /* All access */ + +/* Utility macros for PMP configuration manipulation */ + +/* Extract PMP address matching mode */ +#define PMPCFG_GET_A(cfg) (((cfg) & PMPCFG_A_MASK) >> PMPCFG_A_SHIFT) + +/* Extract permission bits from configuration byte */ +#define PMPCFG_GET_PERM(cfg) ((cfg) & (PMPCFG_R | PMPCFG_W | PMPCFG_X)) + +/* Check if region is locked */ +#define PMPCFG_IS_LOCKED(cfg) (((cfg) & PMPCFG_L) != 0) + +/* Check if region is enabled (address mode is not OFF) */ +#define PMPCFG_IS_ENABLED(cfg) (PMPCFG_GET_A(cfg) != PMPCFG_A_OFF) diff --git a/arch/riscv/hal.c b/arch/riscv/hal.c index 8aa250c5..12aff35a 100644 --- a/arch/riscv/hal.c +++ b/arch/riscv/hal.c @@ -3,6 +3,7 @@ #include #include "csr.h" +#include "pmp.h" #include "private/stdio.h" #include "private/utils.h" @@ -98,6 +99,15 @@ static void *pending_switch_sp = NULL; static uint32_t current_isr_frame_sp = 0; +/* Trap nesting depth counter to prevent inner traps from overwriting + * current_isr_frame_sp. Only the outermost trap should set the ISR frame + * pointer that context switching requires. + * + * Exported to allow trap context detection and avoid unnecessary nested + * trap triggering. + */ +volatile uint32_t trap_nesting_depth = 0; + /* Current task's kernel stack top address for U-mode trap entry. * For U-mode tasks: points to (kernel_stack + kernel_stack_size). * For M-mode tasks: NULL (uses global _stack). @@ -283,27 +293,18 @@ static void uart_init(uint32_t baud) void hal_hardware_init(void) { uart_init(USART_BAUD); + + /* Initialize PMP hardware with kernel memory regions */ + pmp_config_t *pmp_config = pmp_get_config(); + if (pmp_init_kernel(pmp_config) != 0) + hal_panic(); + /* Set the first timer interrupt. Subsequent interrupts are set in ISR */ mtimecmp_w(mtime_r() + (F_CPU / F_TIMER)); /* Install low-level I/O handlers for the C standard library */ _stdout_install(__putchar); _stdin_install(__getchar); _stdpoll_install(__kbhit); - - /* Grant U-mode access to all memory for validation purposes. - * By default, RISC-V PMP denies all access to U-mode, which would cause - * instruction access faults immediately upon task switch. This minimal - * setup allows U-mode tasks to execute and serves as a placeholder until - * the full PMP driver is integrated. - */ - uint32_t pmpaddr = -1UL; /* Cover entire address space */ - uint8_t pmpcfg = 0x0F; /* TOR, R, W, X enabled */ - - asm volatile( - "csrw pmpaddr0, %0\n" - "csrw pmpcfg0, %1\n" - : - : "r"(pmpaddr), "r"(pmpcfg)); } /* Halts the system in an unrecoverable state */ @@ -364,11 +365,18 @@ static const char *exc_msg[] = { */ uint32_t do_trap(uint32_t cause, uint32_t epc, uint32_t isr_sp) { + uint32_t ret_sp; /* Return value - SP to use for context restore */ + /* Reset pending switch at start of every trap */ pending_switch_sp = NULL; - /* Store ISR frame SP so hal_switch_stack() can save it to prev task */ - current_isr_frame_sp = isr_sp; + /* Only the outermost trap sets the ISR frame pointer for context + * switching. Inner traps must not overwrite this value. + */ + if (trap_nesting_depth == 0) { + current_isr_frame_sp = isr_sp; + } + trap_nesting_depth++; if (MCAUSE_IS_INTERRUPT(cause)) { /* Asynchronous Interrupt */ uint32_t int_code = MCAUSE_GET_CODE(cause); @@ -380,6 +388,15 @@ uint32_t do_trap(uint32_t cause, uint32_t epc, uint32_t isr_sp) mtimecmp_w(mtimecmp_r() + (F_CPU / F_TIMER)); /* Invoke scheduler - parameter 1 = from timer, increment ticks */ dispatcher(1); + + /* Nested traps must return their own SP to unwind properly. + * Only the outermost trap performs context switch restoration. + */ + if (trap_nesting_depth > 1) { + pending_switch_sp = NULL; + ret_sp = isr_sp; + goto trap_exit; + } } else { /* All other interrupt sources are unexpected and fatal */ hal_panic(); @@ -389,18 +406,23 @@ uint32_t do_trap(uint32_t cause, uint32_t epc, uint32_t isr_sp) /* Handle ecall from U-mode - system calls */ if (code == MCAUSE_ECALL_UMODE) { + /* Extract syscall arguments from ISR frame */ + uint32_t *f = (uint32_t *) isr_sp; + /* Advance mepc past the ecall instruction (4 bytes) */ uint32_t new_epc = epc + 4; write_csr(mepc, new_epc); - /* Extract syscall arguments from ISR frame */ - uint32_t *f = (uint32_t *) isr_sp; - int syscall_num = f[FRAME_A7]; uintptr_t arg1 = (uintptr_t) f[FRAME_A0]; uintptr_t arg2 = (uintptr_t) f[FRAME_A1]; uintptr_t arg3 = (uintptr_t) f[FRAME_A2]; + /* Update frame EPC before syscall dispatch to ensure correct return + * address if nested traps occur during syscall execution. + */ + f[FRAME_EPC] = new_epc; + /* Dispatch to syscall implementation via direct table lookup. * Must use do_syscall here instead of syscall() to avoid recursive * traps, as the user-space syscall() may be overridden with ecall. @@ -410,11 +432,17 @@ uint32_t do_trap(uint32_t cause, uint32_t epc, uint32_t isr_sp) uintptr_t a3); int retval = do_syscall(syscall_num, arg1, arg2, arg3); - /* Store return value and updated PC */ + /* Store return value */ f[FRAME_A0] = (uint32_t) retval; - f[FRAME_EPC] = new_epc; - return isr_sp; + /* Return new SP if syscall triggered context switch. Nested traps + * return their own SP to properly unwind the call stack. + */ + ret_sp = (trap_nesting_depth > 1) + ? isr_sp + : (pending_switch_sp ? (uint32_t) pending_switch_sp + : isr_sp); + goto trap_exit; } /* Handle ecall from M-mode - used for yielding in preemptive mode */ @@ -435,8 +463,54 @@ uint32_t do_trap(uint32_t cause, uint32_t epc, uint32_t isr_sp) */ dispatcher(0); - /* Return the SP to use - new task's frame or current frame */ - return pending_switch_sp ? (uint32_t) pending_switch_sp : isr_sp; + /* Nested traps must return their own SP to unwind properly. + * Only the outermost trap performs context switch restoration. + * Clear pending switch for nested traps to prevent incorrect + * restoration by outer handlers. + */ + if (trap_nesting_depth > 1) { + pending_switch_sp = NULL; + ret_sp = isr_sp; + } else { + ret_sp = + pending_switch_sp ? (uint32_t) pending_switch_sp : isr_sp; + } + goto trap_exit; + } + + /* Attempt to recover load/store access faults. + * + * This assumes all U-mode access faults are PMP-related, which holds + * for platforms without MMU where PMP is the sole memory protection. + * On MCU hardware, bus faults or access to non-existent memory may + * also trigger access exceptions, and terminating the task instead + * of panicking could hide such hardware issues. + */ + if (code == 5 || code == 7) { + uint32_t mtval = read_csr(mtval); + int32_t pmp_result = pmp_handle_access_fault(mtval, code == 7); + if (pmp_result == PMP_FAULT_RECOVERED) { + /* PMP fault handled successfully, return current frame */ + return isr_sp; + } + if (pmp_result == PMP_FAULT_TERMINATE) { + /* Task terminated (marked as zombie), switch to next task. + * Print diagnostic before switching. */ + trap_puts("[PMP] Task terminated: "); + trap_puts(code == 7 ? "Store" : "Load"); + trap_puts(" access fault at 0x"); + for (int i = 28; i >= 0; i -= 4) { + uint32_t nibble = (mtval >> i) & 0xF; + _putchar(nibble < 10 ? '0' + nibble : 'A' + nibble - 10); + } + trap_puts("\r\n"); + + /* Force context switch to next task */ + dispatcher(0); + ret_sp = + pending_switch_sp ? (uint32_t) pending_switch_sp : isr_sp; + goto trap_exit; + } } /* Print exception info via direct UART (safe in trap context) */ @@ -457,7 +531,12 @@ uint32_t do_trap(uint32_t cause, uint32_t epc, uint32_t isr_sp) } /* Return the SP to use for context restore - new task's frame or current */ - return pending_switch_sp ? (uint32_t) pending_switch_sp : isr_sp; + ret_sp = pending_switch_sp ? (uint32_t) pending_switch_sp : isr_sp; + +trap_exit: + /* Decrement trap nesting depth before returning */ + trap_nesting_depth--; + return ret_sp; } /* Enables the machine-level timer interrupt source */ diff --git a/arch/riscv/hal.h b/arch/riscv/hal.h index a665d63f..55c0f769 100644 --- a/arch/riscv/hal.h +++ b/arch/riscv/hal.h @@ -3,14 +3,21 @@ #include /* Symbols from the linker script, defining memory boundaries */ -extern uint32_t _gp; /* Global pointer initialized at reset */ -extern uint32_t _stack; /* Kernel stack top for ISR and boot */ -extern uint32_t _heap_start, _heap_end; /* Start/end of the HEAP memory */ -extern uint32_t _heap_size; /* Size of HEAP memory */ +extern uint32_t _gp; /* Global pointer initialized at reset */ +extern uint32_t _stack; /* Kernel stack top for ISR and boot */ +extern uint32_t _stext, _etext; /* Start/end of the .text section */ extern uint32_t _sidata; /* Start address for .data initialization */ extern uint32_t _sdata, _edata; /* Start/end address for .data section */ extern uint32_t _sbss, _ebss; /* Start/end address for .bss section */ extern uint32_t _end; /* End of kernel image */ +extern uint32_t _heap_start, _heap_end; /* Start/end of the HEAP memory */ +extern uint32_t _heap_size; /* Size of HEAP memory */ +extern uint32_t _stack_bottom, _stack_top; /* Bottom/top of the STACK memory */ + +/* Current trap handler nesting depth. Zero when not in trap context, + * increments for each nested trap entry, decrements on exit. + */ +extern volatile uint32_t trap_nesting_depth; /* Read a RISC-V Control and Status Register (CSR). * @reg : The symbolic name of the CSR (e.g., mstatus). @@ -28,6 +35,25 @@ extern uint32_t _end; /* End of kernel image */ */ #define write_csr(reg, val) ({ asm volatile("csrw " #reg ", %0" ::"rK"(val)); }) +/* Read CSR by numeric address (for dynamic register selection). + * Used when CSR number is not known at compile-time (e.g., PMP registers). + * @csr_num : CSR address as a compile-time constant. + */ +#define read_csr_num(csr_num) \ + ({ \ + uint32_t __tmp; \ + asm volatile("csrr %0, %1" : "=r"(__tmp) : "i"(csr_num)); \ + __tmp; \ + }) + +/* Write CSR by numeric address (for dynamic register selection). + * Used when CSR number is not known at compile-time (e.g., PMP registers). + * @csr_num : CSR address as a compile-time constant. + * @val : The 32-bit value to write. + */ +#define write_csr_num(csr_num, val) \ + ({ asm volatile("csrw %0, %1" ::"i"(csr_num), "rK"(val)); }) + /* Globally enable or disable machine-level interrupts by setting mstatus.MIE. * @enable : Non-zero to enable, zero to disable. * Returns the previous state of the interrupt enable bit (1 if enabled, 0 if @@ -145,3 +171,10 @@ void hal_cpu_idle(void); /* Default stack size for new tasks if not otherwise specified */ #define DEFAULT_STACK_SIZE 8192 + +/* Physical Memory Protection (PMP) region limit constants */ +#define PMP_MAX_REGIONS 16 /* RISC-V supports 16 PMP regions */ +#define PMP_TOR_PAIRS \ + 8 /* In TOR mode, 16 regions = 8 pairs (uses 2 addrs each) */ +#define MIN_PMP_REGION_SIZE \ + 4 /* Minimum addressable size in TOR mode (4 bytes) */ diff --git a/arch/riscv/pmp.c b/arch/riscv/pmp.c new file mode 100644 index 00000000..a646c928 --- /dev/null +++ b/arch/riscv/pmp.c @@ -0,0 +1,762 @@ +/* RISC-V Physical Memory Protection (PMP) Implementation + * + * Provides hardware-enforced memory isolation using PMP in TOR mode. + */ + +#include +#include +#include + +#include "csr.h" +#include "pmp.h" +#include "private/error.h" + +/* PMP CSR Access Helpers + * + * RISC-V CSR instructions require compile-time constant addresses encoded in + * the instruction itself. These helpers use switch-case dispatch to provide + * runtime indexed access to PMP configuration and address registers. + * + * - pmpcfg0-3: Four 32-bit configuration registers (16 regions, 8 bits each) + * - pmpaddr0-15: Sixteen address registers for TOR (Top-of-Range) mode + */ + +/* Read PMP configuration register by index (0-3) */ +static uint32_t read_pmpcfg(uint8_t idx) +{ + switch (idx) { + case 0: + return read_csr_num(CSR_PMPCFG0); + case 1: + return read_csr_num(CSR_PMPCFG1); + case 2: + return read_csr_num(CSR_PMPCFG2); + case 3: + return read_csr_num(CSR_PMPCFG3); + default: + return 0; + } +} + +/* Write PMP configuration register by index (0-3) */ +static void write_pmpcfg(uint8_t idx, uint32_t val) +{ + switch (idx) { + case 0: + write_csr_num(CSR_PMPCFG0, val); + break; + case 1: + write_csr_num(CSR_PMPCFG1, val); + break; + case 2: + write_csr_num(CSR_PMPCFG2, val); + break; + case 3: + write_csr_num(CSR_PMPCFG3, val); + break; + } +} + +/* Read PMP address register by index (0-15) */ +static uint32_t __attribute__((unused)) read_pmpaddr(uint8_t idx) +{ + switch (idx) { + case 0: + return read_csr_num(CSR_PMPADDR0); + case 1: + return read_csr_num(CSR_PMPADDR1); + case 2: + return read_csr_num(CSR_PMPADDR2); + case 3: + return read_csr_num(CSR_PMPADDR3); + case 4: + return read_csr_num(CSR_PMPADDR4); + case 5: + return read_csr_num(CSR_PMPADDR5); + case 6: + return read_csr_num(CSR_PMPADDR6); + case 7: + return read_csr_num(CSR_PMPADDR7); + case 8: + return read_csr_num(CSR_PMPADDR8); + case 9: + return read_csr_num(CSR_PMPADDR9); + case 10: + return read_csr_num(CSR_PMPADDR10); + case 11: + return read_csr_num(CSR_PMPADDR11); + case 12: + return read_csr_num(CSR_PMPADDR12); + case 13: + return read_csr_num(CSR_PMPADDR13); + case 14: + return read_csr_num(CSR_PMPADDR14); + case 15: + return read_csr_num(CSR_PMPADDR15); + default: + return 0; + } +} + +/* Write PMP address register by index (0-15) */ +static void write_pmpaddr(uint8_t idx, uint32_t val) +{ + switch (idx) { + case 0: + write_csr_num(CSR_PMPADDR0, val); + break; + case 1: + write_csr_num(CSR_PMPADDR1, val); + break; + case 2: + write_csr_num(CSR_PMPADDR2, val); + break; + case 3: + write_csr_num(CSR_PMPADDR3, val); + break; + case 4: + write_csr_num(CSR_PMPADDR4, val); + break; + case 5: + write_csr_num(CSR_PMPADDR5, val); + break; + case 6: + write_csr_num(CSR_PMPADDR6, val); + break; + case 7: + write_csr_num(CSR_PMPADDR7, val); + break; + case 8: + write_csr_num(CSR_PMPADDR8, val); + break; + case 9: + write_csr_num(CSR_PMPADDR9, val); + break; + case 10: + write_csr_num(CSR_PMPADDR10, val); + break; + case 11: + write_csr_num(CSR_PMPADDR11, val); + break; + case 12: + write_csr_num(CSR_PMPADDR12, val); + break; + case 13: + write_csr_num(CSR_PMPADDR13, val); + break; + case 14: + write_csr_num(CSR_PMPADDR14, val); + break; + case 15: + write_csr_num(CSR_PMPADDR15, val); + break; + } +} + +/* Static Memory Pools for Boot-time PMP Initialization + * + * Defines kernel memory regions protected at boot. Each pool specifies + * a memory range and access permissions. + */ +static const mempool_t kernel_mempools[] = { + DECLARE_MEMPOOL("kernel_text", + &_stext, + &_etext, + PMPCFG_PERM_RX, + PMP_PRIORITY_KERNEL), + DECLARE_MEMPOOL("kernel_data", + &_sdata, + &_edata, + PMPCFG_PERM_RW, + PMP_PRIORITY_KERNEL), + DECLARE_MEMPOOL("kernel_bss", + &_sbss, + &_ebss, + PMPCFG_PERM_RW, + PMP_PRIORITY_KERNEL), +}; + +#define KERNEL_MEMPOOL_COUNT \ + (sizeof(kernel_mempools) / sizeof(kernel_mempools[0])) + +/* Global PMP configuration (shadow of hardware state) */ +static pmp_config_t pmp_global_config; + +/* Helper to compute pmpcfg register index and bit offset for a given region */ +static inline void pmp_get_cfg_indices(uint8_t region_idx, + uint8_t *cfg_idx, + uint8_t *cfg_offset) +{ + *cfg_idx = region_idx / 4; + *cfg_offset = (region_idx % 4) * 8; +} + +pmp_config_t *pmp_get_config(void) +{ + return &pmp_global_config; +} + +int32_t pmp_init(pmp_config_t *config) +{ + if (!config) + return ERR_PMP_INVALID_REGION; + + /* Clear all PMP regions in hardware and shadow configuration */ + for (uint8_t i = 0; i < PMP_MAX_REGIONS; i++) { + write_pmpaddr(i, 0); + if (i % 4 == 0) + write_pmpcfg(i / 4, 0); + + config->regions[i].addr_start = 0; + config->regions[i].addr_end = 0; + config->regions[i].permissions = 0; + config->regions[i].priority = PMP_PRIORITY_TEMPORARY; + config->regions[i].region_id = i; + config->regions[i].locked = 0; + } + + config->region_count = 0; + config->next_region_idx = 0; + config->initialized = 1; + + return ERR_OK; +} +int32_t pmp_init_pools(pmp_config_t *config, + const mempool_t *pools, + size_t count) +{ + if (!config || !pools || count == 0) + return ERR_PMP_INVALID_REGION; + + /* Initialize PMP hardware and state */ + int32_t ret = pmp_init(config); + if (ret < 0) + return ret; + + /* Configure each memory pool as a PMP region */ + for (size_t i = 0; i < count; i++) { + const mempool_t *pool = &pools[i]; + + /* Validate pool boundaries */ + if (pool->start >= pool->end) + return ERR_PMP_ADDR_RANGE; + + /* Prepare PMP region configuration */ + pmp_region_t region = { + .addr_start = pool->start, + .addr_end = pool->end, + .permissions = pool->flags & (PMPCFG_R | PMPCFG_W | PMPCFG_X), + .priority = pool->tag, + .region_id = i, + .locked = (pool->tag == PMP_PRIORITY_KERNEL), + }; + + /* Configure the PMP region */ + ret = pmp_set_region(config, ®ion); + if (ret < 0) + return ret; + } + + return ERR_OK; +} + +int32_t pmp_init_kernel(pmp_config_t *config) +{ + return pmp_init_pools(config, kernel_mempools, KERNEL_MEMPOOL_COUNT); +} + +int32_t pmp_set_region(pmp_config_t *config, const pmp_region_t *region) +{ + if (!config || !region) + return ERR_PMP_INVALID_REGION; + + /* Validate region index is within bounds */ + if (region->region_id >= PMP_MAX_REGIONS) + return ERR_PMP_INVALID_REGION; + + /* Validate address range */ + if (region->addr_start >= region->addr_end) + return ERR_PMP_ADDR_RANGE; + + /* Check if region is already locked */ + if (config->regions[region->region_id].locked) + return ERR_PMP_LOCKED; + + uint8_t region_idx = region->region_id; + uint8_t pmpcfg_idx, pmpcfg_offset; + pmp_get_cfg_indices(region_idx, &pmpcfg_idx, &pmpcfg_offset); + + /* Build configuration byte with TOR mode and permissions */ + uint8_t pmpcfg_perm = + region->permissions & (PMPCFG_R | PMPCFG_W | PMPCFG_X); + uint8_t pmpcfg_byte = PMPCFG_A_TOR | pmpcfg_perm; + + /* Read current pmpcfg register to preserve other regions */ + uint32_t pmpcfg_val = read_pmpcfg(pmpcfg_idx); + + /* Clear the configuration byte for this region */ + pmpcfg_val &= ~(0xFFU << pmpcfg_offset); + + /* Write new configuration byte */ + pmpcfg_val |= (pmpcfg_byte << pmpcfg_offset); + + /* Write pmpaddr register with the upper boundary */ + write_pmpaddr(region_idx, region->addr_end >> 2); + + /* Write pmpcfg register with updated configuration */ + write_pmpcfg(pmpcfg_idx, pmpcfg_val); + + /* Update shadow configuration */ + config->regions[region_idx].addr_start = region->addr_start; + config->regions[region_idx].addr_end = region->addr_end; + config->regions[region_idx].permissions = region->permissions; + config->regions[region_idx].priority = region->priority; + config->regions[region_idx].region_id = region_idx; + config->regions[region_idx].locked = region->locked; + + /* Update region count if this is a newly used region */ + if (region_idx >= config->region_count) + config->region_count = region_idx + 1; + + return ERR_OK; +} + +int32_t pmp_disable_region(pmp_config_t *config, uint8_t region_idx) +{ + if (!config) + return ERR_PMP_INVALID_REGION; + + /* Validate region index is within bounds */ + if (region_idx >= PMP_MAX_REGIONS) + return ERR_PMP_INVALID_REGION; + + /* Check if region is already locked */ + if (config->regions[region_idx].locked) + return ERR_PMP_LOCKED; + + uint8_t pmpcfg_idx, pmpcfg_offset; + pmp_get_cfg_indices(region_idx, &pmpcfg_idx, &pmpcfg_offset); + + /* Read current pmpcfg register to preserve other regions */ + uint32_t pmpcfg_val = read_pmpcfg(pmpcfg_idx); + + /* Clear the configuration byte for this region (disables it) */ + pmpcfg_val &= ~(0xFFU << pmpcfg_offset); + + /* Write pmpcfg register with updated configuration */ + write_pmpcfg(pmpcfg_idx, pmpcfg_val); + + /* Update shadow configuration */ + config->regions[region_idx].addr_start = 0; + config->regions[region_idx].addr_end = 0; + config->regions[region_idx].permissions = 0; + + return ERR_OK; +} + +int32_t pmp_lock_region(pmp_config_t *config, uint8_t region_idx) +{ + if (!config) + return ERR_PMP_INVALID_REGION; + + /* Validate region index is within bounds */ + if (region_idx >= PMP_MAX_REGIONS) + return ERR_PMP_INVALID_REGION; + + uint8_t pmpcfg_idx, pmpcfg_offset; + pmp_get_cfg_indices(region_idx, &pmpcfg_idx, &pmpcfg_offset); + + /* Read current pmpcfg register to preserve other regions */ + uint32_t pmpcfg_val = read_pmpcfg(pmpcfg_idx); + + /* Get current configuration byte for this region */ + uint8_t pmpcfg_byte = (pmpcfg_val >> pmpcfg_offset) & 0xFFU; + + /* Set lock bit */ + pmpcfg_byte |= PMPCFG_L; + + /* Clear the configuration byte for this region */ + pmpcfg_val &= ~(0xFFU << pmpcfg_offset); + + /* Write new configuration byte with lock bit set */ + pmpcfg_val |= (pmpcfg_byte << pmpcfg_offset); + + /* Write pmpcfg register with updated configuration */ + write_pmpcfg(pmpcfg_idx, pmpcfg_val); + + /* Update shadow configuration */ + config->regions[region_idx].locked = 1; + + return ERR_OK; +} + +int32_t pmp_get_region(const pmp_config_t *config, + uint8_t region_idx, + pmp_region_t *region) +{ + if (!config || !region) + return ERR_PMP_INVALID_REGION; + + /* Validate region index is within bounds */ + if (region_idx >= PMP_MAX_REGIONS) + return ERR_PMP_INVALID_REGION; + + uint8_t pmpcfg_idx, pmpcfg_offset; + pmp_get_cfg_indices(region_idx, &pmpcfg_idx, &pmpcfg_offset); + + /* Read the address and configuration from shadow configuration */ + region->addr_start = config->regions[region_idx].addr_start; + region->addr_end = config->regions[region_idx].addr_end; + region->permissions = config->regions[region_idx].permissions; + region->priority = config->regions[region_idx].priority; + region->region_id = region_idx; + region->locked = config->regions[region_idx].locked; + + return ERR_OK; +} + +int32_t pmp_check_access(const pmp_config_t *config, + uint32_t addr, + uint32_t size, + uint8_t is_write, + uint8_t is_execute) +{ + if (!config) + return ERR_PMP_INVALID_REGION; + + /* Reject overflow to prevent security bypass */ + if (addr > UINT32_MAX - size) + return 0; + + uint32_t access_end = addr + size; + + /* In TOR mode, check all regions in priority order */ + for (uint8_t i = 0; i < config->region_count; i++) { + const pmp_region_t *region = &config->regions[i]; + + /* Skip disabled regions */ + if (region->addr_start == 0 && region->addr_end == 0) + continue; + + /* Check if access falls within this region */ + if (addr >= region->addr_start && access_end <= region->addr_end) { + /* Verify permissions match access type */ + uint8_t required_perm = 0; + if (is_write) + required_perm |= PMPCFG_W; + if (is_execute) + required_perm |= PMPCFG_X; + if (!is_write && !is_execute) + required_perm = PMPCFG_R; + + if ((region->permissions & required_perm) == required_perm) + return 1; /* Access allowed */ + else + return 0; /* Access denied */ + } + } + + /* Access not covered by any region */ + return 0; +} + +/* Selects victim flexpage for eviction using priority-based algorithm. + * + * @mspace : Pointer to memory space + * Returns pointer to victim flexpage, or NULL if no evictable page found. + */ +static fpage_t *select_victim_fpage(memspace_t *mspace) +{ + if (!mspace) + return NULL; + + fpage_t *victim = NULL; + uint32_t lowest_prio = 0; + + /* Select page with highest priority value (lowest priority). + * Kernel regions (priority 0) are never selected. */ + for (fpage_t *fp = mspace->pmp_first; fp; fp = fp->pmp_next) { + if (fp->priority > lowest_prio) { + victim = fp; + lowest_prio = fp->priority; + } + } + + return victim; +} + +/* Sets base address for a TOR paired region entry */ +static void pmp_set_base_entry(uint8_t entry_idx, uint32_t base_addr) +{ + if (entry_idx >= PMP_MAX_REGIONS) + return; + + write_pmpaddr(entry_idx, base_addr >> 2); +} + +/* Loads a flexpage into a PMP hardware region */ +int32_t pmp_load_fpage(fpage_t *fpage, uint8_t region_idx) +{ + if (!fpage || region_idx >= PMP_MAX_REGIONS) + return -1; + + pmp_config_t *config = pmp_get_config(); + if (!config) + return -1; + + uint32_t base = fpage->base; + uint32_t size = fpage->size; + uint32_t end = base + size; + + /* User regions use paired entries (base + top), kernel regions use single + * entry */ + if (PMP_IS_USER_REGION(region_idx)) { + uint8_t base_entry = PMP_USER_BASE_ENTRY(region_idx); + uint8_t top_entry = PMP_USER_TOP_ENTRY(region_idx); + + if (top_entry >= PMP_MAX_REGIONS) { + return -1; + } + + /* Set base entry (address-only, pmpcfg=0) */ + pmp_set_base_entry(base_entry, base); + config->regions[base_entry].addr_start = base; + config->regions[base_entry].addr_end = base; + config->regions[base_entry].permissions = 0; + config->regions[base_entry].locked = 0; + + /* Set top entry (TOR mode with permissions) */ + pmp_region_t top_region = { + .addr_start = base, + .addr_end = end, + .permissions = fpage->rwx, + .priority = fpage->priority, + .region_id = top_entry, + .locked = 0, + }; + + int32_t ret = pmp_set_region(config, &top_region); + if (ret < 0) + return ret; + + fpage->pmp_id = base_entry; + + } else { + /* Kernel region: single entry TOR mode */ + pmp_region_t region = { + .addr_start = base, + .addr_end = end, + .permissions = fpage->rwx, + .priority = fpage->priority, + .region_id = region_idx, + .locked = 0, + }; + + int32_t ret = pmp_set_region(config, ®ion); + if (ret < 0) + return ret; + + fpage->pmp_id = region_idx; + } + + return 0; +} + +/* Evicts a flexpage from its PMP hardware region */ +int32_t pmp_evict_fpage(fpage_t *fpage) +{ + if (!fpage) + return -1; + + /* Only evict if actually loaded into PMP */ + if (fpage->pmp_id == PMP_INVALID_REGION) + return 0; + + pmp_config_t *config = pmp_get_config(); + if (!config) + return -1; + + uint8_t region_idx = fpage->pmp_id; + + /* User regions need to clear both base and top entries */ + if (PMP_IS_USER_REGION(region_idx)) { + uint8_t base_entry = PMP_USER_BASE_ENTRY(region_idx); + uint8_t top_entry = PMP_USER_TOP_ENTRY(region_idx); + + /* Clear base entry (address and shadow config) */ + pmp_set_base_entry(base_entry, 0); + config->regions[base_entry].addr_start = 0; + config->regions[base_entry].addr_end = 0; + config->regions[base_entry].permissions = 0; + + /* Clear top entry using existing pmp_disable_region() */ + int32_t ret = pmp_disable_region(config, top_entry); + if (ret < 0) + return ret; + + } else { + /* Kernel region uses simple single-entry eviction */ + int32_t ret = pmp_disable_region(config, region_idx); + if (ret < 0) + return ret; + } + + fpage->pmp_id = PMP_INVALID_REGION; + return 0; +} + +/* Atomically replaces a victim flexpage with a target flexpage in hardware. + * + * Captures victim's PMP ID before eviction to avoid use-after-invalidation. + * + * @victim : Flexpage to evict (must be currently loaded) + * @target : Flexpage to load (must not be currently loaded) + * Returns 0 on success, negative error code on failure. + */ +static int32_t replace_fpage(fpage_t *victim, fpage_t *target) +{ + if (!victim || !target) + return -1; + + /* Capture region ID before eviction invalidates it */ + uint8_t region_idx = victim->pmp_id; + + /* Evict victim from hardware */ + int32_t ret = pmp_evict_fpage(victim); + if (ret != 0) + return ret; + + /* Load target into the freed slot */ + return pmp_load_fpage(target, region_idx); +} + +/* Handles PMP access faults by loading the required flexpage into hardware. */ +int32_t pmp_handle_access_fault(uint32_t fault_addr, uint8_t is_write) +{ + if (!kcb || !kcb->task_current || !kcb->task_current->data) + return PMP_FAULT_UNHANDLED; + + tcb_t *current = (tcb_t *) kcb->task_current->data; + memspace_t *mspace = current->mspace; + if (!mspace) + return PMP_FAULT_UNHANDLED; + + /* Find flexpage containing faulting address */ + fpage_t *target_fpage = NULL; + for (fpage_t *fp = mspace->first; fp; fp = fp->as_next) { + if (fault_addr >= fp->base && fault_addr < (fp->base + fp->size)) { + target_fpage = fp; + break; + } + } + + /* Cannot recover: address not in task's memory space or already loaded */ + if (!target_fpage || target_fpage->pmp_id != PMP_INVALID_REGION) { + /* Mark task as zombie for deferred cleanup */ + current->state = TASK_ZOMBIE; + return PMP_FAULT_TERMINATE; + } + + pmp_config_t *config = pmp_get_config(); + if (!config) + return PMP_FAULT_UNHANDLED; + + /* Load into available region or evict victim */ + if (config->next_region_idx < PMP_MAX_REGIONS) + return pmp_load_fpage(target_fpage, config->next_region_idx); + + fpage_t *victim = select_victim_fpage(mspace); + if (!victim) + return PMP_FAULT_UNHANDLED; + + /* Use helper to safely replace victim with target */ + return replace_fpage(victim, target_fpage); +} +/* Finds next available PMP region slot + * + * User regions require two consecutive free entries. + * Kernel regions require single entry. + * + * Returns region index on success, -1 if none available. + */ +static int8_t find_free_region_slot(const pmp_config_t *config) +{ + if (!config) + return -1; + + for (uint8_t i = 0; i < PMP_MAX_REGIONS; i++) { + /* Skip locked regions */ + if (config->regions[i].locked) + continue; + + bool is_free = (config->regions[i].addr_start == 0 && + config->regions[i].addr_end == 0); + + if (!is_free) + continue; + + /* Kernel regions use single entry */ + if (i < PMP_USER_REGION_START) + return i; + + /* User regions need two consecutive slots */ + if (i + 1 < PMP_MAX_REGIONS) { + bool next_is_free = (config->regions[i + 1].addr_start == 0 && + config->regions[i + 1].addr_end == 0); + bool next_not_locked = !config->regions[i + 1].locked; + + if (next_is_free && next_not_locked) + return i; + } + } + + return -1; +} + +int32_t pmp_switch_context(memspace_t *old_mspace, memspace_t *new_mspace) +{ + if (old_mspace == new_mspace) { + return 0; + } + + pmp_config_t *config = pmp_get_config(); + if (!config) { + return -1; + } + + /* Evict old task's dynamic regions */ + if (old_mspace) { + for (fpage_t *fp = old_mspace->pmp_first; fp; fp = fp->pmp_next) { + /* pmp_evict_fpage correctly handles paired entries */ + if (fp->pmp_id != PMP_INVALID_REGION) { + pmp_evict_fpage(fp); + } + } + } + + /* Load new task's regions and rebuild tracking list */ + if (new_mspace) { + new_mspace->pmp_first = NULL; + + for (fpage_t *fp = new_mspace->first; fp; fp = fp->as_next) { + /* Shared regions may already be loaded */ + if (fp->pmp_id != PMP_INVALID_REGION) { + fp->pmp_next = new_mspace->pmp_first; + new_mspace->pmp_first = fp; + continue; + } + + int32_t region_idx = find_free_region_slot(config); + if (region_idx < 0) + break; + + if (pmp_load_fpage(fp, (uint8_t) region_idx) != 0) + break; + + fp->pmp_next = new_mspace->pmp_first; + new_mspace->pmp_first = fp; + } + } + + return 0; +} diff --git a/arch/riscv/pmp.h b/arch/riscv/pmp.h new file mode 100644 index 00000000..89c066f0 --- /dev/null +++ b/arch/riscv/pmp.h @@ -0,0 +1,186 @@ +/* RISC-V Physical Memory Protection (PMP) Hardware Layer + * + * Low-level interface to RISC-V PMP using TOR (Top-of-Range) mode for + * flexible region management without alignment constraints. + */ + +#pragma once + +#include +#include +#include + +#include "csr.h" + +/* PMP Region Priority Levels (lower value = higher priority) + * + * Used for eviction decisions when hardware PMP regions are exhausted. + */ +typedef enum { + PMP_PRIORITY_KERNEL = 0, + PMP_PRIORITY_STACK = 1, + PMP_PRIORITY_SHARED = 2, + PMP_PRIORITY_TEMPORARY = 3, + PMP_PRIORITY_COUNT = 4 +} pmp_priority_t; + +/* PMP TOR Mode Entry Layout + * + * Kernel regions (0-2) use single entries configured at boot. + * User dynamic regions (3+) use paired entries for flexible boundaries: + * - Base entry: Lower bound address + * - Top entry: Upper bound address with permissions + * Paired entries enable non-contiguous regions without NAPOT alignment. + */ +#define PMP_KERNEL_REGIONS 3 /* Regions 0-2 for kernel */ +#define PMP_USER_REGION_START 3 /* User regions start from 3 */ +#define PMP_ENTRIES_PER_USER 2 /* Each user region uses 2 entries */ +#define PMP_MAX_USER_REGIONS \ + ((PMP_MAX_REGIONS - PMP_USER_REGION_START) / PMP_ENTRIES_PER_USER) + +/* Invalid region marker (fpage not loaded into any PMP region) */ +#define PMP_INVALID_REGION 0xFF + +/* Check if a region index is a user region requiring paired entries */ +#define PMP_IS_USER_REGION(idx) ((idx) >= PMP_USER_REGION_START) + +/* Convert user region index to hardware entry pair */ +#define PMP_USER_BASE_ENTRY(idx) (idx) +#define PMP_USER_TOP_ENTRY(idx) ((idx) + 1) + +/* PMP Region Configuration */ +typedef struct { + uint32_t addr_start; /* Start address (inclusive) */ + uint32_t addr_end; /* End address (exclusive, written to pmpaddr) */ + uint8_t permissions; /* R/W/X bits (PMPCFG_R | PMPCFG_W | PMPCFG_X) */ + pmp_priority_t priority; /* Eviction priority */ + uint8_t region_id; /* Hardware region index (0-15) */ + uint8_t locked; /* Lock bit (cannot modify until reset) */ +} pmp_region_t; + +/* PMP Global State */ +typedef struct { + pmp_region_t regions[PMP_MAX_REGIONS]; /* Shadow of hardware config */ + uint8_t region_count; /* Active region count */ + uint8_t next_region_idx; /* Next free region index */ + uint32_t initialized; /* Initialization flag */ +} pmp_config_t; + +/* PMP Management Functions */ + +/* Returns pointer to global PMP configuration */ +pmp_config_t *pmp_get_config(void); + +/* Initializes the PMP hardware and configuration state. + * @config : Pointer to pmp_config_t structure to be initialized. + * Returns 0 on success, or negative error code on failure. + */ +int32_t pmp_init(pmp_config_t *config); + +/* Configures a single PMP region in TOR mode. + * @config : Pointer to PMP configuration state + * @region : Pointer to pmp_region_t structure with desired configuration + * Returns 0 on success, or negative error code on failure. + */ +int32_t pmp_set_region(pmp_config_t *config, const pmp_region_t *region); + +/* Reads the current configuration of a PMP region. + * @config : Pointer to PMP configuration state + * @region_idx : Index of the region to read (0-15) + * @region : Pointer to pmp_region_t to store the result + * Returns 0 on success, or negative error code on failure. + */ +int32_t pmp_get_region(const pmp_config_t *config, + uint8_t region_idx, + pmp_region_t *region); + +/* Disables a PMP region. + * @config : Pointer to PMP configuration state + * @region_idx : Index of the region to disable (0-15) + * Returns 0 on success, or negative error code on failure. + */ +int32_t pmp_disable_region(pmp_config_t *config, uint8_t region_idx); + +/* Locks a PMP region to prevent further modification. + * @config : Pointer to PMP configuration state + * @region_idx : Index of the region to lock (0-15) + * Returns 0 on success, or negative error code on failure. + */ +int32_t pmp_lock_region(pmp_config_t *config, uint8_t region_idx); + +/* Verifies that a memory access is allowed by the current PMP configuration. + * @config : Pointer to PMP configuration state + * @addr : Address to check + * @size : Size of the access in bytes + * @is_write : 1 for write access, 0 for read access + * @is_execute : 1 for execute access, 0 for data access + * Returns 1 if access is allowed, 0 if denied, or negative error code. + */ +int32_t pmp_check_access(const pmp_config_t *config, + uint32_t addr, + uint32_t size, + uint8_t is_write, + uint8_t is_execute); + +/* Memory Pool Management Functions */ + +/* Initializes PMP regions from an array of memory pool descriptors. + * @config : Pointer to PMP configuration state + * @pools : Array of memory pool descriptors + * @count : Number of pools in the array + * Returns 0 on success, or negative error code on failure. + */ +int32_t pmp_init_pools(pmp_config_t *config, + const mempool_t *pools, + size_t count); + +/* Initializes PMP with default kernel memory pools. + * @config : Pointer to PMP configuration state + * Returns 0 on success, or negative error code on failure. + */ +int32_t pmp_init_kernel(pmp_config_t *config); + +/* Flexpage Hardware Loading Functions */ + +/* Loads a flexpage into a PMP hardware region. + * @fpage : Pointer to flexpage to load + * @region_idx : Hardware PMP region index (0-15) + * Returns 0 on success, or negative error code on failure. + */ +int32_t pmp_load_fpage(fpage_t *fpage, uint8_t region_idx); + +/* Evicts a flexpage from its PMP hardware region. + * @fpage : Pointer to flexpage to evict + * Returns 0 on success, or negative error code on failure. + */ +int32_t pmp_evict_fpage(fpage_t *fpage); + +/* PMP Fault Handler Return Codes */ +#define PMP_FAULT_RECOVERED 0 /* Fault recovered, resume execution */ +#define PMP_FAULT_UNHANDLED (-1) /* Cannot recover, fall through to default */ +#define PMP_FAULT_TERMINATE \ + (-2) /* Task terminated, caller invokes dispatcher */ + +/* Handles PMP access violations. + * + * Attempts to recover from PMP access faults by loading the required memory + * region into a hardware PMP region. If all 16 regions are in use, selects a + * victim for eviction and reuses its region. + * + * @fault_addr : The faulting memory address (from mtval CSR) + * @is_write : 1 for store/AMO access, 0 for load + * Returns PMP_FAULT_RECOVERED, PMP_FAULT_UNHANDLED, or PMP_FAULT_TERMINATE. + */ +int32_t pmp_handle_access_fault(uint32_t fault_addr, uint8_t is_write); + +/* Switches PMP configuration during task context switch. + * + * Evicts the old task's dynamic regions from hardware and loads the new + * task's regions into available PMP slots. Kernel regions marked as locked + * are preserved across all context switches. + * + * @old_mspace : Memory space of task being switched out (can be NULL) + * @new_mspace : Memory space of task being switched in (can be NULL) + * Returns 0 on success, negative error code on failure. + */ +int32_t pmp_switch_context(memspace_t *old_mspace, memspace_t *new_mspace); diff --git a/include/linmo.h b/include/linmo.h index 2c7fcda3..9f12b36c 100644 --- a/include/linmo.h +++ b/include/linmo.h @@ -54,6 +54,7 @@ #include #include +#include #include #include #include diff --git a/include/private/error.h b/include/private/error.h index fc1646c8..7c706157 100644 --- a/include/private/error.h +++ b/include/private/error.h @@ -34,6 +34,14 @@ enum { ERR_STACK_CHECK, /* Stack overflow or corruption detected */ ERR_HEAP_CORRUPT, /* Heap corruption or invalid free detected */ + /* PMP Configuration Errors */ + ERR_PMP_INVALID_REGION, /* Invalid PMP region parameters */ + ERR_PMP_NO_REGIONS, /* No free PMP regions available */ + ERR_PMP_LOCKED, /* Region is locked by higher priority */ + ERR_PMP_SIZE_MISMATCH, /* Size doesn't meet alignment requirements */ + ERR_PMP_ADDR_RANGE, /* Address range is invalid */ + ERR_PMP_NOT_INIT, /* PMP not initialized */ + /* IPC and Synchronization Errors */ ERR_PIPE_ALLOC, /* Pipe allocation failed */ ERR_PIPE_DEALLOC, /* Pipe deallocation failed */ diff --git a/include/sys/memprot.h b/include/sys/memprot.h new file mode 100644 index 00000000..ff202ba8 --- /dev/null +++ b/include/sys/memprot.h @@ -0,0 +1,134 @@ +/* Memory Protection Abstractions + * + * Software abstractions for managing memory protection at different + * granularities. These structures build upon hardware protection + * mechanisms (such as RISC-V PMP) to provide flexible, architecture- + * independent memory isolation. + */ + +#pragma once + +#include + +/* Forward declarations */ +struct fpage; +struct as; + +/* Flexpage + * + * Contiguous physical memory region with hardware-enforced protection. + * Supports arbitrary base addresses and sizes without alignment constraints. + */ +typedef struct fpage { + struct fpage *as_next; /* Next in address space list */ + struct fpage *map_next; /* Next in mapping chain */ + struct fpage *pmp_next; /* Next in PMP queue */ + uint32_t base; /* Physical base address */ + uint32_t size; /* Region size */ + uint32_t rwx; /* R/W/X permission bits */ + uint32_t pmp_id; /* PMP region index */ + uint32_t flags; /* Status flags */ + uint32_t priority; /* Eviction priority */ + int used; /* Usage counter */ +} fpage_t; + +/* Memory Space + * + * Collection of flexpages forming a task's memory view. Can be shared + * across multiple tasks. + */ +typedef struct memspace { + uint32_t as_id; /* Memory space identifier */ + struct fpage *first; /* Head of flexpage list */ + struct fpage *pmp_first; /* Head of PMP-loaded list */ + struct fpage *pmp_stack; /* Stack regions */ + uint32_t shared; /* Shared flag */ +} memspace_t; + +/* Memory Pool + * + * Static memory region descriptor for boot-time PMP initialization. + */ +typedef struct { + const char *name; /* Pool name */ + uintptr_t start; /* Start address */ + uintptr_t end; /* End address */ + uint32_t flags; /* Access permissions */ + uint32_t tag; /* Pool type/priority */ +} mempool_t; + +/* Memory Pool Declaration Helpers + * + * Simplifies memory pool initialization with designated initializers. + * DECLARE_MEMPOOL_FROM_SYMBOLS uses token concatenation to construct + * linker symbol names automatically. + */ +#define DECLARE_MEMPOOL(name_, start_, end_, flags_, tag_) \ + { \ + .name = (name_), .start = (uintptr_t) (start_), \ + .end = (uintptr_t) (end_), .flags = (flags_), .tag = (tag_), \ + } + +#define DECLARE_MEMPOOL_FROM_SYMBOLS(name_, sym_base_, flags_, tag_) \ + DECLARE_MEMPOOL((name_), &(sym_base_##_start), &(sym_base_##_end), \ + (flags_), (tag_)) + +/* Flexpage Management Functions */ + +/* Creates and initializes a new flexpage. + * @base : Physical base address + * @size : Size in bytes + * @rwx : Permission bits + * @priority : Eviction priority + * Returns pointer to created flexpage, or NULL on failure. + */ +fpage_t *mo_fpage_create(uint32_t base, + uint32_t size, + uint32_t rwx, + uint32_t priority); + +/* Destroys a flexpage. + * @fpage : Pointer to flexpage to destroy + */ +void mo_fpage_destroy(fpage_t *fpage); + +/* Memory Space Management Functions */ + +/* Creates and initializes a memory space. + * @as_id : Memory space identifier + * @shared : Whether this space can be shared across tasks + * Returns pointer to created memory space, or NULL on failure. + */ +memspace_t *mo_memspace_create(uint32_t as_id, uint32_t shared); + +/* Destroys a memory space and all its flexpages. + * @mspace : Pointer to memory space to destroy + */ +void mo_memspace_destroy(memspace_t *mspace); + +/* Flexpage Hardware Loading Functions */ + +/* Loads a flexpage into a hardware region. + * @fpage : Pointer to flexpage to load + * @region_idx : Hardware region index (0-15) + * Returns 0 on success, or negative error code on failure. + */ +int32_t mo_load_fpage(fpage_t *fpage, uint8_t region_idx); + +/* Evicts a flexpage from its hardware region. + * @fpage : Pointer to flexpage to evict + * Returns 0 on success, or negative error code on failure. + */ +int32_t mo_evict_fpage(fpage_t *fpage); + +/* Handles memory access faults. + * + * Attempts to recover from access faults by loading the required memory + * region into a hardware region. If all regions are in use, selects a + * victim for eviction and reuses its region. + * + * @fault_addr : The faulting memory address + * @is_write : 1 for write access, 0 for read access + * Returns 0 on successful recovery, negative error code on failure. + */ +int32_t mo_handle_access_fault(uint32_t fault_addr, uint8_t is_write); diff --git a/include/sys/task.h b/include/sys/task.h index 4421e8b1..02942135 100644 --- a/include/sys/task.h +++ b/include/sys/task.h @@ -37,11 +37,12 @@ enum task_priorities { /* Task Lifecycle States */ enum task_states { - TASK_STOPPED, /* Task created but not yet scheduled */ - TASK_READY, /* Task in ready state, waiting to be scheduled */ - TASK_RUNNING, /* Task currently executing on CPU */ - TASK_BLOCKED, /* Task waiting for delay timer to expire */ - TASK_SUSPENDED /* Task paused/excluded from scheduling until resumed */ + TASK_STOPPED, /* Task created but not yet scheduled */ + TASK_READY, /* Task in ready state, waiting to be scheduled */ + TASK_RUNNING, /* Task currently executing on CPU */ + TASK_BLOCKED, /* Task waiting for delay timer to expire */ + TASK_SUSPENDED, /* Task paused/excluded from scheduling until resumed */ + TASK_ZOMBIE /* Task terminated, awaiting resource cleanup */ }; /* Task Privilege Mode */ @@ -65,6 +66,9 @@ typedef enum { #define TASK_TIMESLICE_LOW 10 /* Low priority: longer slice */ #define TASK_TIMESLICE_IDLE 15 /* Idle tasks: longest slice */ +/* Forward declaration */ +struct memspace; + /* Task Control Block (TCB) * * Contains all essential information about a single task, including saved @@ -82,6 +86,9 @@ typedef struct tcb { void *kernel_stack; /* Base address of kernel stack (NULL for M-mode) */ size_t kernel_stack_size; /* Size of kernel stack in bytes (0 for M-mode) */ + /* Memory Protection */ + struct memspace *mspace; /* Memory space for task isolation */ + /* Scheduling Parameters */ uint16_t prio; /* Encoded priority (base and time slice counter) */ uint8_t prio_level; /* Priority level (0-7, 0 = highest) */ diff --git a/kernel/memprot.c b/kernel/memprot.c new file mode 100644 index 00000000..cbbd51bb --- /dev/null +++ b/kernel/memprot.c @@ -0,0 +1,99 @@ +/* Memory Protection Management + * + * Provides allocation and management functions for flexpages, which are + * software abstractions representing contiguous physical memory regions with + * hardware-enforced protection attributes. + */ + +#include +#include +#include +#include + +/* Creates and initializes a flexpage */ +fpage_t *mo_fpage_create(uint32_t base, + uint32_t size, + uint32_t rwx, + uint32_t priority) +{ + fpage_t *fpage = malloc(sizeof(fpage_t)); + if (!fpage) + return NULL; + + /* Initialize all fields */ + fpage->as_next = NULL; + fpage->map_next = NULL; + fpage->pmp_next = NULL; + fpage->base = base; + fpage->size = size; + fpage->rwx = rwx; + fpage->pmp_id = PMP_INVALID_REGION; /* Not loaded into PMP initially */ + fpage->flags = 0; /* No flags set initially */ + fpage->priority = priority; + fpage->used = 0; /* Not in use initially */ + + return fpage; +} + +/* Destroys a flexpage */ +void mo_fpage_destroy(fpage_t *fpage) +{ + if (!fpage) + return; + + free(fpage); +} + +/* Loads a flexpage into a hardware region */ +int32_t mo_load_fpage(fpage_t *fpage, uint8_t region_idx) +{ + return pmp_load_fpage(fpage, region_idx); +} + +/* Evicts a flexpage from its hardware region */ +int32_t mo_evict_fpage(fpage_t *fpage) +{ + return pmp_evict_fpage(fpage); +} + +/* Handles memory access faults */ +int32_t mo_handle_access_fault(uint32_t fault_addr, uint8_t is_write) +{ + return pmp_handle_access_fault(fault_addr, is_write); +} + +/* Creates and initializes a memory space */ +memspace_t *mo_memspace_create(uint32_t as_id, uint32_t shared) +{ + memspace_t *mspace = malloc(sizeof(memspace_t)); + if (!mspace) + return NULL; + + mspace->as_id = as_id; + mspace->first = NULL; + mspace->pmp_first = NULL; + mspace->pmp_stack = NULL; + mspace->shared = shared; + + return mspace; +} + +/* Destroys a memory space and all its flexpages */ +void mo_memspace_destroy(memspace_t *mspace) +{ + if (!mspace) + return; + + /* Evict and free all flexpages in the list */ + fpage_t *fp = mspace->first; + while (fp) { + fpage_t *next = fp->as_next; + /* Evict from PMP hardware before freeing to prevent stale references */ + if (fp->pmp_id != PMP_INVALID_REGION) + pmp_evict_fpage(fp); + mo_fpage_destroy(fp); + fp = next; + } + + free(mspace); +} diff --git a/kernel/task.c b/kernel/task.c index 168632db..286e83e0 100644 --- a/kernel/task.c +++ b/kernel/task.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include "private/error.h" @@ -379,6 +380,46 @@ void yield(void); void _dispatch(void) __attribute__((weak, alias("dispatch"))); void _yield(void) __attribute__((weak, alias("yield"))); +/* Zombie Task Cleanup + * + * Scans the task list for terminated (zombie) tasks and frees their resources. + * Called from dispatcher to ensure cleanup happens in a safe context. + */ +static void task_cleanup_zombies(void) +{ + if (!kcb || !kcb->tasks) + return; + + list_node_t *node = list_next(kcb->tasks->head); + while (node && node != kcb->tasks->tail) { + list_node_t *next = list_next(node); + tcb_t *tcb = node->data; + + if (tcb && tcb->state == TASK_ZOMBIE) { + /* Remove from task list */ + list_remove(kcb->tasks, node); + kcb->task_count--; + + /* Clear from lookup cache */ + for (int i = 0; i < TASK_CACHE_SIZE; i++) { + if (task_cache[i].task == tcb) { + task_cache[i].id = 0; + task_cache[i].task = NULL; + } + } + + /* Free all resources */ + if (tcb->mspace) + mo_memspace_destroy(tcb->mspace); + free(tcb->stack); + if (tcb->kernel_stack) + free(tcb->kernel_stack); + free(tcb); + } + node = next; + } +} + /* Round-Robin Scheduler Implementation * * Implements an efficient round-robin scheduler tweaked for small systems. @@ -559,6 +600,9 @@ void dispatch(void) if (unlikely(!kcb || !kcb->task_current || !kcb->task_current->data)) panic(ERR_NO_TASKS); + /* Clean up any terminated (zombie) tasks */ + task_cleanup_zombies(); + /* Save current context - only needed for cooperative mode. * In preemptive mode, ISR already saved context to stack, * so we skip this step to avoid interference. @@ -648,6 +692,9 @@ void dispatch(void) next_task->state = TASK_RUNNING; next_task->time_slice = get_priority_timeslice(next_task->prio_level); + /* Switch PMP configuration if tasks have different memory spaces */ + pmp_switch_context(prev_task->mspace, next_task->mspace); + /* Perform context switch based on scheduling mode */ if (kcb->preemptive) { /* Same task - no context switch needed */ @@ -688,15 +735,16 @@ void yield(void) * READY again. */ if (kcb->preemptive) { - /* Trigger one dispatcher call - this will context switch to another - * task. When we return here (after being rescheduled), our delay will - * have expired. + /* Avoid triggering nested traps when already in trap context. + * The dispatcher can be invoked directly since the trap handler + * environment is already established. */ - __asm__ volatile("ecall"); + if (trap_nesting_depth > 0) { + dispatcher(0); + } else { + __asm__ volatile("ecall"); + } - /* After ecall returns, we've been context-switched back, meaning we're - * READY. No need to check state - if we're executing, we're ready. - */ return; } @@ -711,7 +759,15 @@ void yield(void) /* In cooperative mode, delays are only processed on an explicit yield. */ list_foreach(kcb->tasks, delay_update, NULL); + /* Save current task before scheduler modifies task_current */ + tcb_t *prev_task = (tcb_t *) kcb->task_current->data; + sched_select_next_task(); /* Use O(1) priority scheduler */ + + /* Switch PMP configuration if tasks have different memory spaces */ + tcb_t *next_task = (tcb_t *) kcb->task_current->data; + pmp_switch_context(prev_task->mspace, next_task->mspace); + hal_context_restore(((tcb_t *) kcb->task_current->data)->context, 1); } @@ -810,6 +866,38 @@ static int32_t task_spawn_internal(void *task_entry, tcb->kernel_stack_size = 0; } + /* Create memory space for U-mode tasks only. + * M-mode tasks do not require PMP memory protection. + */ + if (tcb->mode) { + tcb->mspace = mo_memspace_create(kcb->next_tid, 0); + if (!tcb->mspace) { + free(tcb->kernel_stack); + free(tcb->stack); + free(tcb); + panic(ERR_TCB_ALLOC); + } + + /* Register stack as flexpage */ + fpage_t *stack_fpage = + mo_fpage_create((uint32_t) tcb->stack, new_stack_size, + PMPCFG_R | PMPCFG_W, PMP_PRIORITY_STACK); + if (!stack_fpage) { + mo_memspace_destroy(tcb->mspace); + free(tcb->kernel_stack); + free(tcb->stack); + free(tcb); + panic(ERR_TCB_ALLOC); + } + + /* Add stack to memory space */ + stack_fpage->as_next = tcb->mspace->first; + tcb->mspace->first = stack_fpage; + tcb->mspace->pmp_stack = stack_fpage; + } else { + tcb->mspace = NULL; + } + /* Add to task list only after all allocations complete */ CRITICAL_ENTER(); @@ -935,6 +1023,8 @@ int32_t mo_task_cancel(uint16_t id) CRITICAL_LEAVE(); /* Free memory outside critical section */ + if (tcb->mspace) + mo_memspace_destroy(tcb->mspace); free(tcb->stack); if (tcb->kernel_stack) free(tcb->kernel_stack);