diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index fd97cf49077f5..7edcea77410f5 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -606,6 +606,28 @@ config RISCV_ISA_ZACAS If you don't know what to do here, say Y. +config RISCV_ISA_V_UCOPY_THRESHOLD + int "Threshold size for vectorized user copies" + depends on RISCV_ISA_V + default 768 + help + Prefer using vectorized copy_to_user()/copy_from_user() when the + workload size exceeds this value. + +config RISCV_ISA_V_PREEMPTIVE + bool "Run kernel-mode Vector with kernel preemption" + depends on PREEMPTION + depends on RISCV_ISA_V + default y + help + Usually, in-kernel SIMD routines are run with preemption disabled. + Functions which envoke long running SIMD thus must yield core's + vector unit to prevent blocking other tasks for too long. + + This config allows kernel to run SIMD without explicitly disable + preemption. Enabling this config will result in higher memory + consumption due to the allocation of per-task's kernel Vector context. + config TOOLCHAIN_HAS_ZBB bool default y diff --git a/arch/riscv/include/asm/asm-prototypes.h b/arch/riscv/include/asm/asm-prototypes.h index 36b955c762ba0..cd627ec289f16 100644 --- a/arch/riscv/include/asm/asm-prototypes.h +++ b/arch/riscv/include/asm/asm-prototypes.h @@ -9,6 +9,33 @@ long long __lshrti3(long long a, int b); long long __ashrti3(long long a, int b); long long __ashlti3(long long a, int b); +#ifdef CONFIG_RISCV_ISA_V + +#ifdef CONFIG_MMU +asmlinkage int enter_vector_usercopy(void *dst, void *src, size_t n); +#endif /* CONFIG_MMU */ + +void xor_regs_2_(unsigned long bytes, unsigned long *__restrict p1, + const unsigned long *__restrict p2); +void xor_regs_3_(unsigned long bytes, unsigned long *__restrict p1, + const unsigned long *__restrict p2, + const unsigned long *__restrict p3); +void xor_regs_4_(unsigned long bytes, unsigned long *__restrict p1, + const unsigned long *__restrict p2, + const unsigned long *__restrict p3, + const unsigned long *__restrict p4); +void xor_regs_5_(unsigned long bytes, unsigned long *__restrict p1, + const unsigned long *__restrict p2, + const unsigned long *__restrict p3, + const unsigned long *__restrict p4, + const unsigned long *__restrict p5); + +#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE +asmlinkage void riscv_v_context_nesting_start(struct pt_regs *regs); +asmlinkage void riscv_v_context_nesting_end(struct pt_regs *regs); +#endif /* CONFIG_RISCV_ISA_V_PREEMPTIVE */ + +#endif /* CONFIG_RISCV_ISA_V */ #define DECLARE_DO_ERROR_INFO(name) asmlinkage void name(struct pt_regs *regs) diff --git a/arch/riscv/include/asm/entry-common.h b/arch/riscv/include/asm/entry-common.h index 6e4dee49d84b9..b75b39a4fc100 100644 --- a/arch/riscv/include/asm/entry-common.h +++ b/arch/riscv/include/asm/entry-common.h @@ -4,6 +4,23 @@ #define _ASM_RISCV_ENTRY_COMMON_H #include +#include +#include + +static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs, + unsigned long ti_work) +{ + if (ti_work & _TIF_RISCV_V_DEFER_RESTORE) { + clear_thread_flag(TIF_RISCV_V_DEFER_RESTORE); + /* + * We are already called with irq disabled, so go without + * keeping track of riscv_v_flags. + */ + riscv_v_vstate_restore(¤t->thread.vstate, regs); + } +} + +#define arch_exit_to_user_mode_prepare arch_exit_to_user_mode_prepare void handle_page_fault(struct pt_regs *regs); void handle_break(struct pt_regs *regs); diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h index 0de0e2e29a826..eb458e4cb93e6 100644 --- a/arch/riscv/include/asm/processor.h +++ b/arch/riscv/include/asm/processor.h @@ -78,6 +78,43 @@ struct task_struct; struct pt_regs; +/* + * We use a flag to track in-kernel Vector context. Currently the flag has the + * following meaning: + * + * - bit 0: indicates whether the in-kernel Vector context is active. The + * activation of this state disables the preemption. On a non-RT kernel, it + * also disable bh. + * - bits 8: is used for tracking preemptible kernel-mode Vector, when + * RISCV_ISA_V_PREEMPTIVE is enabled. Calling kernel_vector_begin() does not + * disable the preemption if the thread's kernel_vstate.datap is allocated. + * Instead, the kernel set this bit field. Then the trap entry/exit code + * knows if we are entering/exiting the context that owns preempt_v. + * - 0: the task is not using preempt_v + * - 1: the task is actively using preempt_v. But whether does the task own + * the preempt_v context is decided by bits in RISCV_V_CTX_DEPTH_MASK. + * - bit 16-23 are RISCV_V_CTX_DEPTH_MASK, used by context tracking routine + * when preempt_v starts: + * - 0: the task is actively using, and own preempt_v context. + * - non-zero: the task was using preempt_v, but then took a trap within. + * Thus, the task does not own preempt_v. Any use of Vector will have to + * save preempt_v, if dirty, and fallback to non-preemptible kernel-mode + * Vector. + * - bit 30: The in-kernel preempt_v context is saved, and requries to be + * restored when returning to the context that owns the preempt_v. + * - bit 31: The in-kernel preempt_v context is dirty, as signaled by the + * trap entry code. Any context switches out-of current task need to save + * it to the task's in-kernel V context. Also, any traps nesting on-top-of + * preempt_v requesting to use V needs a save. + */ +#define RISCV_V_CTX_DEPTH_MASK 0x00ff0000 + +#define RISCV_V_CTX_UNIT_DEPTH 0x00010000 +#define RISCV_KERNEL_MODE_V 0x00000001 +#define RISCV_PREEMPT_V 0x00000100 +#define RISCV_PREEMPT_V_DIRTY 0x80000000 +#define RISCV_PREEMPT_V_NEED_RESTORE 0x40000000 + /* CPU-specific state of a task */ struct thread_struct { /* Callee-saved registers */ @@ -86,8 +123,10 @@ struct thread_struct { unsigned long s[12]; /* s[0]: frame pointer */ struct __riscv_d_ext_state fstate; unsigned long bad_cause; - unsigned long vstate_ctrl; + u32 riscv_v_flags; + u32 vstate_ctrl; struct __riscv_v_ext_state vstate; + struct __riscv_v_ext_state kernel_vstate; }; /* Whitelist the fstate from the task_struct for hardened usercopy */ diff --git a/arch/riscv/include/asm/simd.h b/arch/riscv/include/asm/simd.h new file mode 100644 index 0000000000000..54efbf523d49c --- /dev/null +++ b/arch/riscv/include/asm/simd.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2017 Linaro Ltd. + * Copyright (C) 2023 SiFive + */ + +#ifndef __ASM_SIMD_H +#define __ASM_SIMD_H + +#include +#include +#include +#include +#include +#include + +#include + +#ifdef CONFIG_RISCV_ISA_V +/* + * may_use_simd - whether it is allowable at this time to issue vector + * instructions or access the vector register file + * + * Callers must not assume that the result remains true beyond the next + * preempt_enable() or return from softirq context. + */ +static __must_check inline bool may_use_simd(void) +{ + /* + * RISCV_KERNEL_MODE_V is only set while preemption is disabled, + * and is clear whenever preemption is enabled. + */ + if (in_hardirq() || in_nmi()) + return false; + + /* + * Nesting is acheived in preempt_v by spreading the control for + * preemptible and non-preemptible kernel-mode Vector into two fields. + * Always try to match with prempt_v if kernel V-context exists. Then, + * fallback to check non preempt_v if nesting happens, or if the config + * is not set. + */ + if (IS_ENABLED(CONFIG_RISCV_ISA_V_PREEMPTIVE) && current->thread.kernel_vstate.datap) { + if (!riscv_preempt_v_started(current)) + return true; + } + /* + * Non-preemptible kernel-mode Vector temporarily disables bh. So we + * must not return true on irq_disabled(). Otherwise we would fail the + * lockdep check calling local_bh_enable() + */ + return !irqs_disabled() && !(riscv_v_flags() & RISCV_KERNEL_MODE_V); +} + +#else /* ! CONFIG_RISCV_ISA_V */ + +static __must_check inline bool may_use_simd(void) +{ + return false; +} + +#endif /* ! CONFIG_RISCV_ISA_V */ + +#endif diff --git a/arch/riscv/include/asm/switch_to.h b/arch/riscv/include/asm/switch_to.h index 7508f3ec80639..774a17b39c7d3 100644 --- a/arch/riscv/include/asm/switch_to.h +++ b/arch/riscv/include/asm/switch_to.h @@ -53,8 +53,7 @@ static inline void __switch_to_fpu(struct task_struct *prev, struct pt_regs *regs; regs = task_pt_regs(prev); - if (unlikely(regs->status & SR_SD)) - fstate_save(prev, regs); + fstate_save(prev, regs); fstate_restore(next, task_pt_regs(next)); } diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h index 8c72d1bcdf141..fb10cdf02b97d 100644 --- a/arch/riscv/include/asm/thread_info.h +++ b/arch/riscv/include/asm/thread_info.h @@ -94,12 +94,14 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); #define TIF_NOTIFY_SIGNAL 9 /* signal notifications exist */ #define TIF_UPROBE 10 /* uprobe breakpoint or singlestep */ #define TIF_32BIT 11 /* compat-mode 32bit process */ +#define TIF_RISCV_V_DEFER_RESTORE 12 /* restore Vector before returing to user */ #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) #define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) #define _TIF_UPROBE (1 << TIF_UPROBE) +#define _TIF_RISCV_V_DEFER_RESTORE (1 << TIF_RISCV_V_DEFER_RESTORE) #define _TIF_WORK_MASK \ (_TIF_NOTIFY_RESUME | _TIF_SIGPENDING | _TIF_NEED_RESCHED | \ diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h index ad4d3d8209b11..9d3a130b78693 100644 --- a/arch/riscv/include/asm/vector.h +++ b/arch/riscv/include/asm/vector.h @@ -23,6 +23,18 @@ extern unsigned long riscv_v_vsize; int riscv_v_setup_vsize(void); bool riscv_v_first_use_handler(struct pt_regs *regs); +void kernel_vector_begin(void); +void kernel_vector_end(void); +void get_cpu_vector_context(void); +void put_cpu_vector_context(void); +void riscv_v_thread_free(struct task_struct *tsk); +void __init riscv_v_setup_ctx_cache(void); +void riscv_v_thread_alloc(struct task_struct *tsk); + +static inline u32 riscv_v_flags(void) +{ + return READ_ONCE(current->thread.riscv_v_flags); +} static __always_inline bool has_vector(void) { @@ -253,7 +265,7 @@ static inline void riscv_v_vstate_discard(struct pt_regs *regs) __riscv_v_vstate_dirty(regs); } -static inline void riscv_v_vstate_save(struct task_struct *task, +static inline void riscv_v_vstate_save(struct __riscv_v_ext_state *vstate, struct pt_regs *regs) { unsigned long sr_vs, sr_vs_dirty; @@ -262,14 +274,13 @@ static inline void riscv_v_vstate_save(struct task_struct *task, ALT_SR_VS(sr_vs_dirty, SR_VS_DIRTY); if ((regs->status & sr_vs) == sr_vs_dirty) { - struct __riscv_v_ext_state *vstate = &task->thread.vstate; if (vstate->datap) __riscv_v_vstate_save(vstate, vstate->datap); __riscv_v_vstate_clean(regs); } } -static inline void riscv_v_vstate_restore(struct task_struct *task, +static inline void riscv_v_vstate_restore(struct __riscv_v_ext_state *vstate, struct pt_regs *regs) { unsigned long sr_vs; @@ -277,21 +288,77 @@ static inline void riscv_v_vstate_restore(struct task_struct *task, ALT_SR_VS(sr_vs, SR_VS); if ((regs->status & sr_vs) != SR_VS_OFF) { - struct __riscv_v_ext_state *vstate = &task->thread.vstate; if (vstate->datap) __riscv_v_vstate_restore(vstate, vstate->datap); __riscv_v_vstate_clean(regs); } } +static inline void riscv_v_vstate_set_restore(struct task_struct *task, + struct pt_regs *regs) +{ + if ((regs->status & SR_VS) != SR_VS_OFF) { + set_tsk_thread_flag(task, TIF_RISCV_V_DEFER_RESTORE); + riscv_v_vstate_on(regs); + } +} + +#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE +static inline bool riscv_preempt_v_dirty(struct task_struct *task) +{ + return !!(task->thread.riscv_v_flags & RISCV_PREEMPT_V_DIRTY); +} + +static inline bool riscv_preempt_v_restore(struct task_struct *task) +{ + return !!(task->thread.riscv_v_flags & RISCV_PREEMPT_V_NEED_RESTORE); +} + +static inline void riscv_preempt_v_clear_dirty(struct task_struct *task) +{ + barrier(); + task->thread.riscv_v_flags &= ~RISCV_PREEMPT_V_DIRTY; +} + +static inline void riscv_preempt_v_set_restore(struct task_struct *task) +{ + barrier(); + task->thread.riscv_v_flags |= RISCV_PREEMPT_V_NEED_RESTORE; +} + +static inline bool riscv_preempt_v_started(struct task_struct *task) +{ + return !!(task->thread.riscv_v_flags & RISCV_PREEMPT_V); +} + +#else /* !CONFIG_RISCV_ISA_V_PREEMPTIVE */ +static inline bool riscv_preempt_v_dirty(struct task_struct *task) { return false; } +static inline bool riscv_preempt_v_restore(struct task_struct *task) { return false; } +static inline bool riscv_preempt_v_started(struct task_struct *task) { return false; } +#define riscv_preempt_v_clear_dirty(tsk) do {} while (0) +#define riscv_preempt_v_set_restore(tsk) do {} while (0) +#endif /* CONFIG_RISCV_ISA_V_PREEMPTIVE */ + static inline void __switch_to_vector(struct task_struct *prev, struct task_struct *next) { struct pt_regs *regs; - regs = task_pt_regs(prev); - riscv_v_vstate_save(prev, regs); - riscv_v_vstate_restore(next, task_pt_regs(next)); + if (riscv_preempt_v_started(prev)) { + if (riscv_preempt_v_dirty(prev)) { + __riscv_v_vstate_save(&prev->thread.kernel_vstate, + prev->thread.kernel_vstate.datap); + riscv_preempt_v_clear_dirty(prev); + } + } else { + regs = task_pt_regs(prev); + riscv_v_vstate_save(&prev->thread.vstate, regs); + } + + if (riscv_preempt_v_started(next)) + riscv_preempt_v_set_restore(next); + else + riscv_v_vstate_set_restore(next, task_pt_regs(next)); } void riscv_v_vstate_ctrl_init(struct task_struct *tsk); @@ -309,11 +376,14 @@ static inline bool riscv_v_vstate_query(struct pt_regs *regs) { return false; } static inline bool riscv_v_vstate_ctrl_user_allowed(void) { return false; } #define riscv_v_vsize (0) #define riscv_v_vstate_discard(regs) do {} while (0) -#define riscv_v_vstate_save(task, regs) do {} while (0) -#define riscv_v_vstate_restore(task, regs) do {} while (0) +#define riscv_v_vstate_save(vstate, regs) do {} while (0) +#define riscv_v_vstate_restore(vstate, regs) do {} while (0) #define __switch_to_vector(__prev, __next) do {} while (0) #define riscv_v_vstate_off(regs) do {} while (0) #define riscv_v_vstate_on(regs) do {} while (0) +#define riscv_v_thread_free(tsk) do {} while (0) +#define riscv_v_setup_ctx_cache() do {} while (0) +#define riscv_v_thread_alloc(tsk) do {} while (0) #endif /* CONFIG_RISCV_ISA_V */ diff --git a/arch/riscv/include/asm/xor.h b/arch/riscv/include/asm/xor.h new file mode 100644 index 0000000000000..96011861e46b4 --- /dev/null +++ b/arch/riscv/include/asm/xor.h @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2021 SiFive + */ + +#include +#include +#ifdef CONFIG_RISCV_ISA_V +#include +#include +#include + +static void xor_vector_2(unsigned long bytes, unsigned long *__restrict p1, + const unsigned long *__restrict p2) +{ + kernel_vector_begin(); + xor_regs_2_(bytes, p1, p2); + kernel_vector_end(); +} + +static void xor_vector_3(unsigned long bytes, unsigned long *__restrict p1, + const unsigned long *__restrict p2, + const unsigned long *__restrict p3) +{ + kernel_vector_begin(); + xor_regs_3_(bytes, p1, p2, p3); + kernel_vector_end(); +} + +static void xor_vector_4(unsigned long bytes, unsigned long *__restrict p1, + const unsigned long *__restrict p2, + const unsigned long *__restrict p3, + const unsigned long *__restrict p4) +{ + kernel_vector_begin(); + xor_regs_4_(bytes, p1, p2, p3, p4); + kernel_vector_end(); +} + +static void xor_vector_5(unsigned long bytes, unsigned long *__restrict p1, + const unsigned long *__restrict p2, + const unsigned long *__restrict p3, + const unsigned long *__restrict p4, + const unsigned long *__restrict p5) +{ + kernel_vector_begin(); + xor_regs_5_(bytes, p1, p2, p3, p4, p5); + kernel_vector_end(); +} + +static struct xor_block_template xor_block_rvv = { + .name = "rvv", + .do_2 = xor_vector_2, + .do_3 = xor_vector_3, + .do_4 = xor_vector_4, + .do_5 = xor_vector_5 +}; + +#undef XOR_TRY_TEMPLATES +#define XOR_TRY_TEMPLATES \ + do { \ + xor_speed(&xor_block_8regs); \ + xor_speed(&xor_block_32regs); \ + if (has_vector()) { \ + xor_speed(&xor_block_rvv);\ + } \ + } while (0) +#endif diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index e93c358e5c798..343b8e570607c 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -67,6 +67,7 @@ obj-$(CONFIG_MMU) += vdso.o vdso/ obj-$(CONFIG_RISCV_M_MODE) += traps_misaligned.o obj-$(CONFIG_FPU) += fpu.o obj-$(CONFIG_RISCV_ISA_V) += vector.o +obj-$(CONFIG_RISCV_ISA_V) += kernel_mode_vector.o obj-$(CONFIG_SMP) += smpboot.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_SMP) += cpu_ops.o diff --git a/arch/riscv/kernel/copy-unaligned.S b/arch/riscv/kernel/copy-unaligned.S index cfdecfbaad627..2b3d9398c113f 100644 --- a/arch/riscv/kernel/copy-unaligned.S +++ b/arch/riscv/kernel/copy-unaligned.S @@ -9,7 +9,7 @@ /* void __riscv_copy_words_unaligned(void *, const void *, size_t) */ /* Performs a memcpy without aligning buffers, using word loads and stores. */ /* Note: The size is truncated to a multiple of 8 * SZREG */ -ENTRY(__riscv_copy_words_unaligned) +SYM_FUNC_START(__riscv_copy_words_unaligned) andi a4, a2, ~((8*SZREG)-1) beqz a4, 2f add a3, a1, a4 @@ -36,12 +36,12 @@ ENTRY(__riscv_copy_words_unaligned) 2: ret -END(__riscv_copy_words_unaligned) +SYM_FUNC_END(__riscv_copy_words_unaligned) /* void __riscv_copy_bytes_unaligned(void *, const void *, size_t) */ /* Performs a memcpy without aligning buffers, using only byte accesses. */ /* Note: The size is truncated to a multiple of 8 */ -ENTRY(__riscv_copy_bytes_unaligned) +SYM_FUNC_START(__riscv_copy_bytes_unaligned) andi a4, a2, ~(8-1) beqz a4, 2f add a3, a1, a4 @@ -68,4 +68,4 @@ ENTRY(__riscv_copy_bytes_unaligned) 2: ret -END(__riscv_copy_bytes_unaligned) +SYM_FUNC_END(__riscv_copy_bytes_unaligned) diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index 1f90fee24a8ba..6d239ba461ad1 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -25,9 +25,9 @@ SYM_CODE_START(handle_exception) * register will contain 0, and we should continue on the current TP. */ csrrw tp, CSR_SCRATCH, tp - bnez tp, _save_context + bnez tp, .Lsave_context -_restore_kernel_tpsp: +.Lrestore_kernel_tpsp: csrr tp, CSR_SCRATCH REG_S sp, TASK_TI_KERNEL_SP(tp) @@ -39,7 +39,7 @@ _restore_kernel_tpsp: REG_L sp, TASK_TI_KERNEL_SP(tp) #endif -_save_context: +.Lsave_context: REG_S sp, TASK_TI_USER_SP(tp) REG_L sp, TASK_TI_KERNEL_SP(tp) addi sp, sp, -(PT_SIZE_ON_STACK) @@ -81,6 +81,11 @@ _save_context: .option norelax la gp, __global_pointer$ .option pop + +#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE + move a0, sp + call riscv_v_context_nesting_start +#endif move a0, sp /* pt_regs */ la ra, ret_from_exception @@ -134,6 +139,10 @@ SYM_CODE_START_NOALIGN(ret_from_exception) */ csrw CSR_SCRATCH, tp 1: +#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE + move a0, sp + call riscv_v_context_nesting_end +#endif REG_L a0, PT_STATUS(sp) /* * The current load reservation is effectively part of the processor's @@ -279,7 +288,7 @@ SYM_FUNC_END(__switch_to) .section ".rodata" .align LGREG /* Exception vector table */ -SYM_CODE_START(excp_vect_table) +SYM_DATA_START_LOCAL(excp_vect_table) RISCV_PTR do_trap_insn_misaligned ALT_INSN_FAULT(RISCV_PTR do_trap_insn_fault) RISCV_PTR do_trap_insn_illegal @@ -297,12 +306,11 @@ SYM_CODE_START(excp_vect_table) RISCV_PTR do_page_fault /* load page fault */ RISCV_PTR do_trap_unknown RISCV_PTR do_page_fault /* store page fault */ -excp_vect_table_end: -SYM_CODE_END(excp_vect_table) +SYM_DATA_END_LABEL(excp_vect_table, SYM_L_LOCAL, excp_vect_table_end) #ifndef CONFIG_MMU -SYM_CODE_START(__user_rt_sigreturn) +SYM_DATA_START(__user_rt_sigreturn) li a7, __NR_rt_sigreturn ecall -SYM_CODE_END(__user_rt_sigreturn) +SYM_DATA_END(__user_rt_sigreturn) #endif diff --git a/arch/riscv/kernel/fpu.S b/arch/riscv/kernel/fpu.S index dd2205473de78..6201afcd6b45f 100644 --- a/arch/riscv/kernel/fpu.S +++ b/arch/riscv/kernel/fpu.S @@ -19,7 +19,7 @@ #include #include -ENTRY(__fstate_save) +SYM_FUNC_START(__fstate_save) li a2, TASK_THREAD_F0 add a0, a0, a2 li t1, SR_FS @@ -60,9 +60,9 @@ ENTRY(__fstate_save) sw t0, TASK_THREAD_FCSR_F0(a0) csrc CSR_STATUS, t1 ret -ENDPROC(__fstate_save) +SYM_FUNC_END(__fstate_save) -ENTRY(__fstate_restore) +SYM_FUNC_START(__fstate_restore) li a2, TASK_THREAD_F0 add a0, a0, a2 li t1, SR_FS @@ -103,4 +103,4 @@ ENTRY(__fstate_restore) fscsr t0 csrc CSR_STATUS, t1 ret -ENDPROC(__fstate_restore) +SYM_FUNC_END(__fstate_restore) diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index 9691fa8f2faa7..8eec82355f92f 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -18,7 +18,7 @@ #include "efi-header.S" __HEAD -ENTRY(_start) +SYM_CODE_START(_start) /* * Image header expected by Linux boot-loaders. The image header data * structure is described in asm/image.h. @@ -169,12 +169,12 @@ secondary_start_sbi: XIP_FIXUP_OFFSET a0 call relocate_enable_mmu #endif - call setup_trap_vector + call .Lsetup_trap_vector tail smp_callin #endif /* CONFIG_SMP */ .align 2 -setup_trap_vector: +.Lsetup_trap_vector: /* Set trap vector to exception handler */ la a0, handle_exception csrw CSR_TVEC, a0 @@ -192,9 +192,9 @@ setup_trap_vector: wfi j .Lsecondary_park -END(_start) +SYM_CODE_END(_start) -ENTRY(_start_kernel) +SYM_CODE_START(_start_kernel) /* Mask all interrupts */ csrw CSR_IE, zero csrw CSR_IP, zero @@ -211,7 +211,7 @@ ENTRY(_start_kernel) * not implement PMPs, so we set up a quick trap handler to just skip * touching the PMPs on any trap. */ - la a0, pmp_done + la a0, .Lpmp_done csrw CSR_TVEC, a0 li a0, -1 @@ -219,7 +219,7 @@ ENTRY(_start_kernel) li a0, (PMP_A_NAPOT | PMP_R | PMP_W | PMP_X) csrw CSR_PMPCFG0, a0 .align 2 -pmp_done: +.Lpmp_done: /* * The hartid in a0 is expected later on, and we have no firmware @@ -283,12 +283,12 @@ pmp_done: /* Clear BSS for flat non-ELF images */ la a3, __bss_start la a4, __bss_stop - ble a4, a3, clear_bss_done -clear_bss: + ble a4, a3, .Lclear_bss_done +.Lclear_bss: REG_S zero, (a3) add a3, a3, RISCV_SZPTR - blt a3, a4, clear_bss -clear_bss_done: + blt a3, a4, .Lclear_bss +.Lclear_bss_done: #endif la a2, boot_cpu_hartid XIP_FIXUP_OFFSET a2 @@ -315,7 +315,7 @@ clear_bss_done: call relocate_enable_mmu #endif /* CONFIG_MMU */ - call setup_trap_vector + call .Lsetup_trap_vector /* Restore C environment */ la tp, init_task la sp, init_thread_union + THREAD_SIZE @@ -357,10 +357,10 @@ clear_bss_done: tail .Lsecondary_start_common #endif /* CONFIG_RISCV_BOOT_SPINWAIT */ -END(_start_kernel) +SYM_CODE_END(_start_kernel) #ifdef CONFIG_RISCV_M_MODE -ENTRY(reset_regs) +SYM_CODE_START_LOCAL(reset_regs) li sp, 0 li gp, 0 li tp, 0 @@ -458,5 +458,5 @@ ENTRY(reset_regs) .Lreset_regs_done_vector: #endif /* CONFIG_RISCV_ISA_V */ ret -END(reset_regs) +SYM_CODE_END(reset_regs) #endif /* CONFIG_RISCV_M_MODE */ diff --git a/arch/riscv/kernel/hibernate-asm.S b/arch/riscv/kernel/hibernate-asm.S index d698dd7df637b..d040dcf4add45 100644 --- a/arch/riscv/kernel/hibernate-asm.S +++ b/arch/riscv/kernel/hibernate-asm.S @@ -21,7 +21,7 @@ * * Always returns 0 */ -ENTRY(__hibernate_cpu_resume) +SYM_FUNC_START(__hibernate_cpu_resume) /* switch to hibernated image's page table. */ csrw CSR_SATP, s0 sfence.vma @@ -34,7 +34,7 @@ ENTRY(__hibernate_cpu_resume) mv a0, zero ret -END(__hibernate_cpu_resume) +SYM_FUNC_END(__hibernate_cpu_resume) /* * Prepare to restore the image. @@ -42,7 +42,7 @@ END(__hibernate_cpu_resume) * a1: satp of temporary page tables. * a2: cpu_resume. */ -ENTRY(hibernate_restore_image) +SYM_FUNC_START(hibernate_restore_image) mv s0, a0 mv s1, a1 mv s2, a2 @@ -50,7 +50,7 @@ ENTRY(hibernate_restore_image) REG_L a1, relocated_restore_code jr a1 -END(hibernate_restore_image) +SYM_FUNC_END(hibernate_restore_image) /* * The below code will be executed from a 'safe' page. @@ -58,7 +58,7 @@ END(hibernate_restore_image) * back to the original memory location. Finally, it jumps to __hibernate_cpu_resume() * to restore the CPU context. */ -ENTRY(hibernate_core_restore_code) +SYM_FUNC_START(hibernate_core_restore_code) /* switch to temp page table. */ csrw satp, s1 sfence.vma @@ -73,4 +73,4 @@ ENTRY(hibernate_core_restore_code) bnez s4, .Lcopy jr s2 -END(hibernate_core_restore_code) +SYM_FUNC_END(hibernate_core_restore_code) diff --git a/arch/riscv/kernel/kernel_mode_vector.c b/arch/riscv/kernel/kernel_mode_vector.c new file mode 100644 index 0000000000000..6afe80c7f03ab --- /dev/null +++ b/arch/riscv/kernel/kernel_mode_vector.c @@ -0,0 +1,247 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2012 ARM Ltd. + * Author: Catalin Marinas + * Copyright (C) 2017 Linaro Ltd. + * Copyright (C) 2021 SiFive + */ +#include +#include +#include +#include +#include + +#include +#include +#include +#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE +#include +#endif + +static inline void riscv_v_flags_set(u32 flags) +{ + WRITE_ONCE(current->thread.riscv_v_flags, flags); +} + +static inline void riscv_v_start(u32 flags) +{ + int orig; + + orig = riscv_v_flags(); + BUG_ON((orig & flags) != 0); + riscv_v_flags_set(orig | flags); + barrier(); +} + +static inline void riscv_v_stop(u32 flags) +{ + int orig; + + barrier(); + orig = riscv_v_flags(); + BUG_ON((orig & flags) == 0); + riscv_v_flags_set(orig & ~flags); +} + +/* + * Claim ownership of the CPU vector context for use by the calling context. + * + * The caller may freely manipulate the vector context metadata until + * put_cpu_vector_context() is called. + */ +void get_cpu_vector_context(void) +{ + /* + * disable softirqs so it is impossible for softirqs to nest + * get_cpu_vector_context() when kernel is actively using Vector. + */ + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + local_bh_disable(); + else + preempt_disable(); + + riscv_v_start(RISCV_KERNEL_MODE_V); +} + +/* + * Release the CPU vector context. + * + * Must be called from a context in which get_cpu_vector_context() was + * previously called, with no call to put_cpu_vector_context() in the + * meantime. + */ +void put_cpu_vector_context(void) +{ + riscv_v_stop(RISCV_KERNEL_MODE_V); + + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + local_bh_enable(); + else + preempt_enable(); +} + +#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE +static __always_inline u32 *riscv_v_flags_ptr(void) +{ + return ¤t->thread.riscv_v_flags; +} + +static inline void riscv_preempt_v_set_dirty(void) +{ + *riscv_v_flags_ptr() |= RISCV_PREEMPT_V_DIRTY; +} + +static inline void riscv_preempt_v_reset_flags(void) +{ + *riscv_v_flags_ptr() &= ~(RISCV_PREEMPT_V_DIRTY | RISCV_PREEMPT_V_NEED_RESTORE); +} + +static inline void riscv_v_ctx_depth_inc(void) +{ + *riscv_v_flags_ptr() += RISCV_V_CTX_UNIT_DEPTH; +} + +static inline void riscv_v_ctx_depth_dec(void) +{ + *riscv_v_flags_ptr() -= RISCV_V_CTX_UNIT_DEPTH; +} + +static inline u32 riscv_v_ctx_get_depth(void) +{ + return *riscv_v_flags_ptr() & RISCV_V_CTX_DEPTH_MASK; +} + +static int riscv_v_stop_kernel_context(void) +{ + if (riscv_v_ctx_get_depth() != 0 || !riscv_preempt_v_started(current)) + return 1; + + riscv_preempt_v_clear_dirty(current); + riscv_v_stop(RISCV_PREEMPT_V); + return 0; +} + +static int riscv_v_start_kernel_context(bool *is_nested) +{ + struct __riscv_v_ext_state *kvstate, *uvstate; + + kvstate = ¤t->thread.kernel_vstate; + if (!kvstate->datap) + return -ENOENT; + + if (riscv_preempt_v_started(current)) { + WARN_ON(riscv_v_ctx_get_depth() == 0); + *is_nested = true; + get_cpu_vector_context(); + if (riscv_preempt_v_dirty(current)) { + __riscv_v_vstate_save(kvstate, kvstate->datap); + riscv_preempt_v_clear_dirty(current); + } + riscv_preempt_v_set_restore(current); + return 0; + } + + /* Transfer the ownership of V from user to kernel, then save */ + riscv_v_start(RISCV_PREEMPT_V | RISCV_PREEMPT_V_DIRTY); + if ((task_pt_regs(current)->status & SR_VS) == SR_VS_DIRTY) { + uvstate = ¤t->thread.vstate; + __riscv_v_vstate_save(uvstate, uvstate->datap); + } + riscv_preempt_v_clear_dirty(current); + return 0; +} + +/* low-level V context handling code, called with irq disabled */ +asmlinkage void riscv_v_context_nesting_start(struct pt_regs *regs) +{ + int depth; + + if (!riscv_preempt_v_started(current)) + return; + + depth = riscv_v_ctx_get_depth(); + if (depth == 0 && (regs->status & SR_VS) == SR_VS_DIRTY) + riscv_preempt_v_set_dirty(); + + riscv_v_ctx_depth_inc(); +} + +asmlinkage void riscv_v_context_nesting_end(struct pt_regs *regs) +{ + struct __riscv_v_ext_state *vstate = ¤t->thread.kernel_vstate; + u32 depth; + + WARN_ON(!irqs_disabled()); + + if (!riscv_preempt_v_started(current)) + return; + + riscv_v_ctx_depth_dec(); + depth = riscv_v_ctx_get_depth(); + if (depth == 0) { + if (riscv_preempt_v_restore(current)) { + __riscv_v_vstate_restore(vstate, vstate->datap); + __riscv_v_vstate_clean(regs); + riscv_preempt_v_reset_flags(); + } + } +} +#else +#define riscv_v_start_kernel_context(nested) (-ENOENT) +#define riscv_v_stop_kernel_context() (-ENOENT) +#endif /* CONFIG_RISCV_ISA_V_PREEMPTIVE */ + +/* + * kernel_vector_begin(): obtain the CPU vector registers for use by the calling + * context + * + * Must not be called unless may_use_simd() returns true. + * Task context in the vector registers is saved back to memory as necessary. + * + * A matching call to kernel_vector_end() must be made before returning from the + * calling context. + * + * The caller may freely use the vector registers until kernel_vector_end() is + * called. + */ +void kernel_vector_begin(void) +{ + bool nested = false; + + if (WARN_ON(!has_vector())) + return; + + BUG_ON(!may_use_simd()); + + if (riscv_v_start_kernel_context(&nested)) { + get_cpu_vector_context(); + riscv_v_vstate_save(¤t->thread.vstate, task_pt_regs(current)); + } + + if (!nested) + riscv_v_vstate_set_restore(current, task_pt_regs(current)); + + riscv_v_enable(); +} +EXPORT_SYMBOL_GPL(kernel_vector_begin); + +/* + * kernel_vector_end(): give the CPU vector registers back to the current task + * + * Must be called from a context in which kernel_vector_begin() was previously + * called, with no call to kernel_vector_end() in the meantime. + * + * The caller must not use the vector registers after this function is called, + * unless kernel_vector_begin() is called again in the meantime. + */ +void kernel_vector_end(void) +{ + if (WARN_ON(!has_vector())) + return; + + riscv_v_disable(); + + if (riscv_v_stop_kernel_context()) + put_cpu_vector_context(); +} +EXPORT_SYMBOL_GPL(kernel_vector_end); diff --git a/arch/riscv/kernel/mcount-dyn.S b/arch/riscv/kernel/mcount-dyn.S index 669b8697aa38a..58dd96a2a1534 100644 --- a/arch/riscv/kernel/mcount-dyn.S +++ b/arch/riscv/kernel/mcount-dyn.S @@ -82,7 +82,7 @@ .endm #endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ -ENTRY(ftrace_caller) +SYM_FUNC_START(ftrace_caller) SAVE_ABI addi a0, t0, -FENTRY_RA_OFFSET @@ -91,8 +91,7 @@ ENTRY(ftrace_caller) mv a1, ra mv a3, sp -ftrace_call: - .global ftrace_call +SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL) call ftrace_stub #ifdef CONFIG_FUNCTION_GRAPH_TRACER @@ -102,16 +101,15 @@ ftrace_call: #ifdef HAVE_FUNCTION_GRAPH_FP_TEST mv a2, s0 #endif -ftrace_graph_call: - .global ftrace_graph_call +SYM_INNER_LABEL(ftrace_graph_call, SYM_L_GLOBAL) call ftrace_stub #endif RESTORE_ABI jr t0 -ENDPROC(ftrace_caller) +SYM_FUNC_END(ftrace_caller) #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS -ENTRY(ftrace_regs_caller) +SYM_FUNC_START(ftrace_regs_caller) SAVE_ALL addi a0, t0, -FENTRY_RA_OFFSET @@ -120,8 +118,7 @@ ENTRY(ftrace_regs_caller) mv a1, ra mv a3, sp -ftrace_regs_call: - .global ftrace_regs_call +SYM_INNER_LABEL(ftrace_regs_call, SYM_L_GLOBAL) call ftrace_stub #ifdef CONFIG_FUNCTION_GRAPH_TRACER @@ -131,12 +128,11 @@ ftrace_regs_call: #ifdef HAVE_FUNCTION_GRAPH_FP_TEST mv a2, s0 #endif -ftrace_graph_regs_call: - .global ftrace_graph_regs_call +SYM_INNER_LABEL(ftrace_graph_regs_call, SYM_L_GLOBAL) call ftrace_stub #endif RESTORE_ALL jr t0 -ENDPROC(ftrace_regs_caller) +SYM_FUNC_END(ftrace_regs_caller) #endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ diff --git a/arch/riscv/kernel/mcount.S b/arch/riscv/kernel/mcount.S index 8818a8fa9ff3a..b4dd9ed6849e3 100644 --- a/arch/riscv/kernel/mcount.S +++ b/arch/riscv/kernel/mcount.S @@ -61,7 +61,7 @@ SYM_TYPED_FUNC_START(ftrace_stub_graph) ret SYM_FUNC_END(ftrace_stub_graph) -ENTRY(return_to_handler) +SYM_FUNC_START(return_to_handler) /* * On implementing the frame point test, the ideal way is to compare the * s0 (frame pointer, if enabled) on entry and the sp (stack pointer) on return. @@ -76,25 +76,25 @@ ENTRY(return_to_handler) mv a2, a0 RESTORE_RET_ABI_STATE jalr a2 -ENDPROC(return_to_handler) +SYM_FUNC_END(return_to_handler) #endif #ifndef CONFIG_DYNAMIC_FTRACE -ENTRY(MCOUNT_NAME) +SYM_FUNC_START(MCOUNT_NAME) la t4, ftrace_stub #ifdef CONFIG_FUNCTION_GRAPH_TRACER la t0, ftrace_graph_return REG_L t1, 0(t0) - bne t1, t4, do_ftrace_graph_caller + bne t1, t4, .Ldo_ftrace_graph_caller la t3, ftrace_graph_entry REG_L t2, 0(t3) la t6, ftrace_graph_entry_stub - bne t2, t6, do_ftrace_graph_caller + bne t2, t6, .Ldo_ftrace_graph_caller #endif la t3, ftrace_trace_function REG_L t5, 0(t3) - bne t5, t4, do_trace + bne t5, t4, .Ldo_trace ret #ifdef CONFIG_FUNCTION_GRAPH_TRACER @@ -102,7 +102,7 @@ ENTRY(MCOUNT_NAME) * A pseudo representation for the function graph tracer: * prepare_to_return(&ra_to_caller_of_caller, ra_to_caller) */ -do_ftrace_graph_caller: +.Ldo_ftrace_graph_caller: addi a0, s0, -SZREG mv a1, ra #ifdef HAVE_FUNCTION_GRAPH_FP_TEST @@ -118,7 +118,7 @@ do_ftrace_graph_caller: * A pseudo representation for the function tracer: * (*ftrace_trace_function)(ra_to_caller, ra_to_caller_of_caller) */ -do_trace: +.Ldo_trace: REG_L a1, -SZREG(s0) mv a0, ra @@ -126,6 +126,6 @@ do_trace: jalr t5 RESTORE_ABI_STATE ret -ENDPROC(MCOUNT_NAME) +SYM_FUNC_END(MCOUNT_NAME) #endif EXPORT_SYMBOL(MCOUNT_NAME) diff --git a/arch/riscv/kernel/probes/rethook_trampoline.S b/arch/riscv/kernel/probes/rethook_trampoline.S index 21bac92a170a9..f2cd83d9b0f00 100644 --- a/arch/riscv/kernel/probes/rethook_trampoline.S +++ b/arch/riscv/kernel/probes/rethook_trampoline.S @@ -75,7 +75,7 @@ REG_L x31, PT_T6(sp) .endm -ENTRY(arch_rethook_trampoline) +SYM_CODE_START(arch_rethook_trampoline) addi sp, sp, -(PT_SIZE_ON_STACK) save_all_base_regs @@ -90,4 +90,4 @@ ENTRY(arch_rethook_trampoline) addi sp, sp, PT_SIZE_ON_STACK ret -ENDPROC(arch_rethook_trampoline) +SYM_CODE_END(arch_rethook_trampoline) diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index dd973216e31cf..d90413ededd2a 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -151,6 +151,7 @@ void flush_thread(void) riscv_v_vstate_off(task_pt_regs(current)); kfree(current->thread.vstate.datap); memset(¤t->thread.vstate, 0, sizeof(struct __riscv_v_ext_state)); + clear_tsk_thread_flag(current, TIF_RISCV_V_DEFER_RESTORE); #endif } @@ -158,7 +159,7 @@ void arch_release_task_struct(struct task_struct *tsk) { /* Free the vector context of datap. */ if (has_vector()) - kfree(tsk->thread.vstate.datap); + riscv_v_thread_free(tsk); } int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) @@ -167,6 +168,8 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) *dst = *src; /* clear entire V context, including datap for a new task */ memset(&dst->thread.vstate, 0, sizeof(struct __riscv_v_ext_state)); + memset(&dst->thread.kernel_vstate, 0, sizeof(struct __riscv_v_ext_state)); + clear_tsk_thread_flag(dst, TIF_RISCV_V_DEFER_RESTORE); return 0; } @@ -200,10 +203,18 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) childregs->a0 = 0; /* Return value of fork() */ p->thread.s[0] = 0; } + p->thread.riscv_v_flags = 0; + if (has_vector()) + riscv_v_thread_alloc(p); p->thread.ra = (unsigned long)ret_from_fork; p->thread.sp = (unsigned long)childregs; /* kernel sp */ return 0; } +void __init arch_task_cache_init(void) +{ + riscv_v_setup_ctx_cache(); +} + EXPORT_SYMBOL_GPL(__fstate_save); EXPORT_SYMBOL_GPL(__fstate_restore); diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c index 2afe460de16a6..e8515aa9d80bf 100644 --- a/arch/riscv/kernel/ptrace.c +++ b/arch/riscv/kernel/ptrace.c @@ -99,8 +99,11 @@ static int riscv_vr_get(struct task_struct *target, * Ensure the vector registers have been saved to the memory before * copying them to membuf. */ - if (target == current) - riscv_v_vstate_save(current, task_pt_regs(current)); + if (target == current) { + get_cpu_vector_context(); + riscv_v_vstate_save(¤t->thread.vstate, task_pt_regs(current)); + put_cpu_vector_context(); + } ptrace_vstate.vstart = vstate->vstart; ptrace_vstate.vl = vstate->vl; diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c index 048b9b23d7543..0e3bdd520617e 100644 --- a/arch/riscv/kernel/signal.c +++ b/arch/riscv/kernel/signal.c @@ -86,7 +86,10 @@ static long save_v_state(struct pt_regs *regs, void __user **sc_vec) /* datap is designed to be 16 byte aligned for better performance */ WARN_ON(unlikely(!IS_ALIGNED((unsigned long)datap, 16))); - riscv_v_vstate_save(current, regs); + get_cpu_vector_context(); + riscv_v_vstate_save(¤t->thread.vstate, regs); + put_cpu_vector_context(); + /* Copy everything of vstate but datap. */ err = __copy_to_user(&state->v_state, ¤t->thread.vstate, offsetof(struct __riscv_v_ext_state, datap)); @@ -116,6 +119,13 @@ static long __restore_v_state(struct pt_regs *regs, void __user *sc_vec) struct __sc_riscv_v_state __user *state = sc_vec; void __user *datap; + /* + * Mark the vstate as clean prior performing the actual copy, + * to avoid getting the vstate incorrectly clobbered by the + * discarded vector state. + */ + riscv_v_vstate_set_restore(current, regs); + /* Copy everything of __sc_riscv_v_state except datap. */ err = __copy_from_user(¤t->thread.vstate, &state->v_state, offsetof(struct __riscv_v_ext_state, datap)); @@ -130,13 +140,7 @@ static long __restore_v_state(struct pt_regs *regs, void __user *sc_vec) * Copy the whole vector content from user space datap. Use * copy_from_user to prevent information leak. */ - err = copy_from_user(current->thread.vstate.datap, datap, riscv_v_vsize); - if (unlikely(err)) - return err; - - riscv_v_vstate_restore(current, regs); - - return err; + return copy_from_user(current->thread.vstate.datap, datap, riscv_v_vsize); } #else #define save_v_state(task, regs) (0) diff --git a/arch/riscv/kernel/suspend_entry.S b/arch/riscv/kernel/suspend_entry.S index f7960c7c5f9e2..a59c4c9036963 100644 --- a/arch/riscv/kernel/suspend_entry.S +++ b/arch/riscv/kernel/suspend_entry.S @@ -16,7 +16,7 @@ .altmacro .option norelax -ENTRY(__cpu_suspend_enter) +SYM_FUNC_START(__cpu_suspend_enter) /* Save registers (except A0 and T0-T6) */ REG_S ra, (SUSPEND_CONTEXT_REGS + PT_RA)(a0) REG_S sp, (SUSPEND_CONTEXT_REGS + PT_SP)(a0) @@ -57,7 +57,7 @@ ENTRY(__cpu_suspend_enter) /* Return to C code */ ret -END(__cpu_suspend_enter) +SYM_FUNC_END(__cpu_suspend_enter) SYM_TYPED_FUNC_START(__cpu_resume_enter) /* Load the global pointer */ diff --git a/arch/riscv/kernel/vdso/flush_icache.S b/arch/riscv/kernel/vdso/flush_icache.S index 82f97d67c23e9..8f884227e8bca 100644 --- a/arch/riscv/kernel/vdso/flush_icache.S +++ b/arch/riscv/kernel/vdso/flush_icache.S @@ -8,7 +8,7 @@ .text /* int __vdso_flush_icache(void *start, void *end, unsigned long flags); */ -ENTRY(__vdso_flush_icache) +SYM_FUNC_START(__vdso_flush_icache) .cfi_startproc #ifdef CONFIG_SMP li a7, __NR_riscv_flush_icache @@ -19,4 +19,4 @@ ENTRY(__vdso_flush_icache) #endif ret .cfi_endproc -ENDPROC(__vdso_flush_icache) +SYM_FUNC_END(__vdso_flush_icache) diff --git a/arch/riscv/kernel/vdso/getcpu.S b/arch/riscv/kernel/vdso/getcpu.S index bb0c05e2ffbae..9c1bd531907f2 100644 --- a/arch/riscv/kernel/vdso/getcpu.S +++ b/arch/riscv/kernel/vdso/getcpu.S @@ -8,11 +8,11 @@ .text /* int __vdso_getcpu(unsigned *cpu, unsigned *node, void *unused); */ -ENTRY(__vdso_getcpu) +SYM_FUNC_START(__vdso_getcpu) .cfi_startproc /* For now, just do the syscall. */ li a7, __NR_getcpu ecall ret .cfi_endproc -ENDPROC(__vdso_getcpu) +SYM_FUNC_END(__vdso_getcpu) diff --git a/arch/riscv/kernel/vdso/rt_sigreturn.S b/arch/riscv/kernel/vdso/rt_sigreturn.S index 10438c7c626ac..3dc022aa8931a 100644 --- a/arch/riscv/kernel/vdso/rt_sigreturn.S +++ b/arch/riscv/kernel/vdso/rt_sigreturn.S @@ -7,10 +7,10 @@ #include .text -ENTRY(__vdso_rt_sigreturn) +SYM_FUNC_START(__vdso_rt_sigreturn) .cfi_startproc .cfi_signal_frame li a7, __NR_rt_sigreturn ecall .cfi_endproc -ENDPROC(__vdso_rt_sigreturn) +SYM_FUNC_END(__vdso_rt_sigreturn) diff --git a/arch/riscv/kernel/vdso/sys_hwprobe.S b/arch/riscv/kernel/vdso/sys_hwprobe.S index 4e704146c77a0..77e57f8305216 100644 --- a/arch/riscv/kernel/vdso/sys_hwprobe.S +++ b/arch/riscv/kernel/vdso/sys_hwprobe.S @@ -5,11 +5,11 @@ #include .text -ENTRY(riscv_hwprobe) +SYM_FUNC_START(riscv_hwprobe) .cfi_startproc li a7, __NR_riscv_hwprobe ecall ret .cfi_endproc -ENDPROC(riscv_hwprobe) +SYM_FUNC_END(riscv_hwprobe) diff --git a/arch/riscv/kernel/vector.c b/arch/riscv/kernel/vector.c index b4e357363d29a..e52a94f40d692 100644 --- a/arch/riscv/kernel/vector.c +++ b/arch/riscv/kernel/vector.c @@ -22,6 +22,10 @@ #include static bool riscv_v_implicit_uacc = IS_ENABLED(CONFIG_RISCV_ISA_V_DEFAULT_ENABLE); +static struct kmem_cache *riscv_v_user_cachep; +#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE +static struct kmem_cache *riscv_v_kernel_cachep; +#endif unsigned long riscv_v_vsize __read_mostly; EXPORT_SYMBOL_GPL(riscv_v_vsize); @@ -55,6 +59,21 @@ int riscv_v_setup_vsize(void) return 0; } +void __init riscv_v_setup_ctx_cache(void) +{ + if (!has_vector()) + return; + + riscv_v_user_cachep = kmem_cache_create_usercopy("riscv_vector_ctx", + riscv_v_vsize, 16, SLAB_PANIC, + 0, riscv_v_vsize, NULL); +#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE + riscv_v_kernel_cachep = kmem_cache_create("riscv_vector_kctx", + riscv_v_vsize, 16, + SLAB_PANIC, NULL); +#endif +} + static bool insn_is_vector(u32 insn_buf) { u32 opcode = insn_buf & __INSN_OPCODE_MASK; @@ -88,21 +107,38 @@ static bool insn_is_vector(u32 insn_buf) return false; } -static int riscv_v_thread_zalloc(void) +static int riscv_v_thread_zalloc(struct kmem_cache *cache, + struct __riscv_v_ext_state *ctx) { void *datap; if (!riscv_v_vsize) return -EINVAL; - datap = kzalloc(riscv_v_vsize, GFP_KERNEL); + datap = kmem_cache_zalloc(cache, GFP_KERNEL); if (!datap) return -ENOMEM; - current->thread.vstate.datap = datap; - memset(¤t->thread.vstate, 0, offsetof(struct __riscv_v_ext_state, - datap)); + ctx->datap = datap; + memset(ctx, 0, offsetof(struct __riscv_v_ext_state, datap)); return 0; } +void riscv_v_thread_alloc(struct task_struct *tsk) +{ +#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE + riscv_v_thread_zalloc(riscv_v_kernel_cachep, &tsk->thread.kernel_vstate); +#endif +} + +void riscv_v_thread_free(struct task_struct *tsk) +{ + if (tsk->thread.vstate.datap) + kmem_cache_free(riscv_v_user_cachep, tsk->thread.vstate.datap); +#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE + if (tsk->thread.kernel_vstate.datap) + kmem_cache_free(riscv_v_kernel_cachep, tsk->thread.kernel_vstate.datap); +#endif +} + #define VSTATE_CTRL_GET_CUR(x) ((x) & PR_RISCV_V_VSTATE_CTRL_CUR_MASK) #define VSTATE_CTRL_GET_NEXT(x) (((x) & PR_RISCV_V_VSTATE_CTRL_NEXT_MASK) >> 2) #define VSTATE_CTRL_MAKE_NEXT(x) (((x) << 2) & PR_RISCV_V_VSTATE_CTRL_NEXT_MASK) @@ -131,7 +167,8 @@ static inline void riscv_v_ctrl_set(struct task_struct *tsk, int cur, int nxt, ctrl |= VSTATE_CTRL_MAKE_NEXT(nxt); if (inherit) ctrl |= PR_RISCV_V_VSTATE_CTRL_INHERIT; - tsk->thread.vstate_ctrl = ctrl; + tsk->thread.vstate_ctrl &= ~PR_RISCV_V_VSTATE_CTRL_MASK; + tsk->thread.vstate_ctrl |= ctrl; } bool riscv_v_vstate_ctrl_user_allowed(void) @@ -174,12 +211,12 @@ bool riscv_v_first_use_handler(struct pt_regs *regs) * context where VS has been off. So, try to allocate the user's V * context and resume execution. */ - if (riscv_v_thread_zalloc()) { + if (riscv_v_thread_zalloc(riscv_v_user_cachep, ¤t->thread.vstate)) { force_sig(SIGBUS); return true; } riscv_v_vstate_on(regs); - riscv_v_vstate_restore(current, regs); + riscv_v_vstate_set_restore(current, regs); return true; } diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile index df5568beb612f..50be5edf0d78e 100644 --- a/arch/riscv/lib/Makefile +++ b/arch/riscv/lib/Makefile @@ -11,8 +11,13 @@ lib-y += memmove.o lib-y += strcmp.o lib-y += strlen.o lib-y += strncmp.o -lib-$(CONFIG_MMU) += uaccess.o +ifeq ($(CONFIG_MMU), y) +lib-y += uaccess.o +lib-$(CONFIG_RISCV_ISA_V) += uaccess_vector.o +endif lib-$(CONFIG_64BIT) += tishift.o lib-$(CONFIG_RISCV_ISA_ZICBOZ) += clear_page.o obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o +lib-$(CONFIG_RISCV_ISA_V) += xor.o +lib-$(CONFIG_RISCV_ISA_V) += riscv_v_helpers.o diff --git a/arch/riscv/lib/memcpy.S b/arch/riscv/lib/memcpy.S index 1a40d01a95439..44e009ec5fef6 100644 --- a/arch/riscv/lib/memcpy.S +++ b/arch/riscv/lib/memcpy.S @@ -7,8 +7,7 @@ #include /* void *memcpy(void *, const void *, size_t) */ -ENTRY(__memcpy) -WEAK(memcpy) +SYM_FUNC_START(__memcpy) move t6, a0 /* Preserve return value */ /* Defer to byte-oriented copy for small sizes */ @@ -105,6 +104,7 @@ WEAK(memcpy) bltu a1, a3, 5b 6: ret -END(__memcpy) +SYM_FUNC_END(__memcpy) +SYM_FUNC_ALIAS_WEAK(memcpy, __memcpy) SYM_FUNC_ALIAS(__pi_memcpy, __memcpy) SYM_FUNC_ALIAS(__pi___memcpy, __memcpy) diff --git a/arch/riscv/lib/memmove.S b/arch/riscv/lib/memmove.S index 838ff2022fe32..cb3e2e7ef0baa 100644 --- a/arch/riscv/lib/memmove.S +++ b/arch/riscv/lib/memmove.S @@ -7,7 +7,6 @@ #include SYM_FUNC_START(__memmove) -SYM_FUNC_START_WEAK(memmove) /* * Returns * a0 - dest @@ -26,8 +25,8 @@ SYM_FUNC_START_WEAK(memmove) */ /* Return if nothing to do */ - beq a0, a1, return_from_memmove - beqz a2, return_from_memmove + beq a0, a1, .Lreturn_from_memmove + beqz a2, .Lreturn_from_memmove /* * Register Uses @@ -60,7 +59,7 @@ SYM_FUNC_START_WEAK(memmove) * small enough not to bother. */ andi t0, a2, -(2 * SZREG) - beqz t0, byte_copy + beqz t0, .Lbyte_copy /* * Now solve for t5 and t6. @@ -87,14 +86,14 @@ SYM_FUNC_START_WEAK(memmove) */ xor t0, a0, a1 andi t1, t0, (SZREG - 1) - beqz t1, coaligned_copy + beqz t1, .Lcoaligned_copy /* Fall through to misaligned fixup copy */ -misaligned_fixup_copy: - bltu a1, a0, misaligned_fixup_copy_reverse +.Lmisaligned_fixup_copy: + bltu a1, a0, .Lmisaligned_fixup_copy_reverse -misaligned_fixup_copy_forward: - jal t0, byte_copy_until_aligned_forward +.Lmisaligned_fixup_copy_forward: + jal t0, .Lbyte_copy_until_aligned_forward andi a5, a1, (SZREG - 1) /* Find the alignment offset of src (a1) */ slli a6, a5, 3 /* Multiply by 8 to convert that to bits to shift */ @@ -153,10 +152,10 @@ misaligned_fixup_copy_forward: mv t3, t6 /* Fix the dest pointer in case the loop was broken */ add a1, t3, a5 /* Restore the src pointer */ - j byte_copy_forward /* Copy any remaining bytes */ + j .Lbyte_copy_forward /* Copy any remaining bytes */ -misaligned_fixup_copy_reverse: - jal t0, byte_copy_until_aligned_reverse +.Lmisaligned_fixup_copy_reverse: + jal t0, .Lbyte_copy_until_aligned_reverse andi a5, a4, (SZREG - 1) /* Find the alignment offset of src (a4) */ slli a6, a5, 3 /* Multiply by 8 to convert that to bits to shift */ @@ -215,18 +214,18 @@ misaligned_fixup_copy_reverse: mv t4, t5 /* Fix the dest pointer in case the loop was broken */ add a4, t4, a5 /* Restore the src pointer */ - j byte_copy_reverse /* Copy any remaining bytes */ + j .Lbyte_copy_reverse /* Copy any remaining bytes */ /* * Simple copy loops for SZREG co-aligned memory locations. * These also make calls to do byte copies for any unaligned * data at their terminations. */ -coaligned_copy: - bltu a1, a0, coaligned_copy_reverse +.Lcoaligned_copy: + bltu a1, a0, .Lcoaligned_copy_reverse -coaligned_copy_forward: - jal t0, byte_copy_until_aligned_forward +.Lcoaligned_copy_forward: + jal t0, .Lbyte_copy_until_aligned_forward 1: REG_L t1, ( 0 * SZREG)(a1) @@ -235,10 +234,10 @@ coaligned_copy_forward: REG_S t1, (-1 * SZREG)(t3) bne t3, t6, 1b - j byte_copy_forward /* Copy any remaining bytes */ + j .Lbyte_copy_forward /* Copy any remaining bytes */ -coaligned_copy_reverse: - jal t0, byte_copy_until_aligned_reverse +.Lcoaligned_copy_reverse: + jal t0, .Lbyte_copy_until_aligned_reverse 1: REG_L t1, (-1 * SZREG)(a4) @@ -247,7 +246,7 @@ coaligned_copy_reverse: REG_S t1, ( 0 * SZREG)(t4) bne t4, t5, 1b - j byte_copy_reverse /* Copy any remaining bytes */ + j .Lbyte_copy_reverse /* Copy any remaining bytes */ /* * These are basically sub-functions within the function. They @@ -258,7 +257,7 @@ coaligned_copy_reverse: * up from where they were left and we avoid code duplication * without any overhead except the call in and return jumps. */ -byte_copy_until_aligned_forward: +.Lbyte_copy_until_aligned_forward: beq t3, t5, 2f 1: lb t1, 0(a1) @@ -269,7 +268,7 @@ byte_copy_until_aligned_forward: 2: jalr zero, 0x0(t0) /* Return to multibyte copy loop */ -byte_copy_until_aligned_reverse: +.Lbyte_copy_until_aligned_reverse: beq t4, t6, 2f 1: lb t1, -1(a4) @@ -285,10 +284,10 @@ byte_copy_until_aligned_reverse: * These will byte copy until they reach the end of data to copy. * At that point, they will call to return from memmove. */ -byte_copy: - bltu a1, a0, byte_copy_reverse +.Lbyte_copy: + bltu a1, a0, .Lbyte_copy_reverse -byte_copy_forward: +.Lbyte_copy_forward: beq t3, t4, 2f 1: lb t1, 0(a1) @@ -299,7 +298,7 @@ byte_copy_forward: 2: ret -byte_copy_reverse: +.Lbyte_copy_reverse: beq t4, t3, 2f 1: lb t1, -1(a4) @@ -309,10 +308,10 @@ byte_copy_reverse: bne t4, t3, 1b 2: -return_from_memmove: +.Lreturn_from_memmove: ret -SYM_FUNC_END(memmove) SYM_FUNC_END(__memmove) +SYM_FUNC_ALIAS_WEAK(memmove, __memmove) SYM_FUNC_ALIAS(__pi_memmove, __memmove) SYM_FUNC_ALIAS(__pi___memmove, __memmove) diff --git a/arch/riscv/lib/memset.S b/arch/riscv/lib/memset.S index 34c5360c6705c..35f358e70bdb6 100644 --- a/arch/riscv/lib/memset.S +++ b/arch/riscv/lib/memset.S @@ -8,8 +8,7 @@ #include /* void *memset(void *, int, size_t) */ -ENTRY(__memset) -WEAK(memset) +SYM_FUNC_START(__memset) move t0, a0 /* Preserve return value */ /* Defer to byte-oriented fill for small sizes */ @@ -110,4 +109,5 @@ WEAK(memset) bltu t0, a3, 5b 6: ret -END(__memset) +SYM_FUNC_END(__memset) +SYM_FUNC_ALIAS_WEAK(memset, __memset) diff --git a/arch/riscv/lib/riscv_v_helpers.c b/arch/riscv/lib/riscv_v_helpers.c new file mode 100644 index 0000000000000..be38a93cedaec --- /dev/null +++ b/arch/riscv/lib/riscv_v_helpers.c @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2023 SiFive + * Author: Andy Chiu + */ +#include +#include + +#include +#include + +#ifdef CONFIG_MMU +#include +#endif + +#ifdef CONFIG_MMU +size_t riscv_v_usercopy_threshold = CONFIG_RISCV_ISA_V_UCOPY_THRESHOLD; +int __asm_vector_usercopy(void *dst, void *src, size_t n); +int fallback_scalar_usercopy(void *dst, void *src, size_t n); +asmlinkage int enter_vector_usercopy(void *dst, void *src, size_t n) +{ + size_t remain, copied; + + /* skip has_vector() check because it has been done by the asm */ + if (!may_use_simd()) + goto fallback; + + kernel_vector_begin(); + remain = __asm_vector_usercopy(dst, src, n); + kernel_vector_end(); + + if (remain) { + copied = n - remain; + dst += copied; + src += copied; + n = remain; + goto fallback; + } + + return remain; + +fallback: + return fallback_scalar_usercopy(dst, src, n); +} +#endif diff --git a/arch/riscv/lib/uaccess.S b/arch/riscv/lib/uaccess.S index 09b47ebacf2e8..a1e4a3c429254 100644 --- a/arch/riscv/lib/uaccess.S +++ b/arch/riscv/lib/uaccess.S @@ -3,6 +3,8 @@ #include #include #include +#include +#include .macro fixup op reg addr lbl 100: @@ -10,8 +12,14 @@ _asm_extable 100b, \lbl .endm -ENTRY(__asm_copy_to_user) -ENTRY(__asm_copy_from_user) +SYM_FUNC_START(__asm_copy_to_user) +#ifdef CONFIG_RISCV_ISA_V + ALTERNATIVE("j fallback_scalar_usercopy", "nop", 0, RISCV_ISA_EXT_v, CONFIG_RISCV_ISA_V) + REG_L t0, riscv_v_usercopy_threshold + bltu a2, t0, fallback_scalar_usercopy + tail enter_vector_usercopy +#endif +SYM_FUNC_START(fallback_scalar_usercopy) /* Enable access to user memory */ li t6, SR_SUM @@ -181,13 +189,14 @@ ENTRY(__asm_copy_from_user) csrc CSR_STATUS, t6 sub a0, t5, a0 ret -ENDPROC(__asm_copy_to_user) -ENDPROC(__asm_copy_from_user) +SYM_FUNC_END(__asm_copy_to_user) +SYM_FUNC_END(fallback_scalar_usercopy) EXPORT_SYMBOL(__asm_copy_to_user) +SYM_FUNC_ALIAS(__asm_copy_from_user, __asm_copy_to_user) EXPORT_SYMBOL(__asm_copy_from_user) -ENTRY(__clear_user) +SYM_FUNC_START(__clear_user) /* Enable access to user memory */ li t6, SR_SUM @@ -233,5 +242,5 @@ ENTRY(__clear_user) csrc CSR_STATUS, t6 sub a0, a3, a0 ret -ENDPROC(__clear_user) +SYM_FUNC_END(__clear_user) EXPORT_SYMBOL(__clear_user) diff --git a/arch/riscv/lib/uaccess_vector.S b/arch/riscv/lib/uaccess_vector.S new file mode 100644 index 0000000000000..51ab5588e9ff3 --- /dev/null +++ b/arch/riscv/lib/uaccess_vector.S @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include +#include +#include +#include + +#define pDst a0 +#define pSrc a1 +#define iNum a2 + +#define iVL a3 + +#define ELEM_LMUL_SETTING m8 +#define vData v0 + + .macro fixup op reg addr lbl +100: + \op \reg, \addr + _asm_extable 100b, \lbl + .endm + +SYM_FUNC_START(__asm_vector_usercopy) + /* Enable access to user memory */ + li t6, SR_SUM + csrs CSR_STATUS, t6 + +loop: + vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma + fixup vle8.v vData, (pSrc), 10f + sub iNum, iNum, iVL + add pSrc, pSrc, iVL + fixup vse8.v vData, (pDst), 11f + add pDst, pDst, iVL + bnez iNum, loop + + /* Exception fixup for vector load is shared with normal exit */ +10: + /* Disable access to user memory */ + csrc CSR_STATUS, t6 + mv a0, iNum + ret + + /* Exception fixup code for vector store. */ +11: + /* Undo the subtraction after vle8.v */ + add iNum, iNum, iVL + /* Make sure the scalar fallback skip already processed bytes */ + csrr t2, CSR_VSTART + sub iNum, iNum, t2 + j 10b +SYM_FUNC_END(__asm_vector_usercopy) diff --git a/arch/riscv/lib/xor.S b/arch/riscv/lib/xor.S new file mode 100644 index 0000000000000..b28f2430e52fa --- /dev/null +++ b/arch/riscv/lib/xor.S @@ -0,0 +1,81 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2021 SiFive + */ +#include +#include +#include + +SYM_FUNC_START(xor_regs_2_) + vsetvli a3, a0, e8, m8, ta, ma + vle8.v v0, (a1) + vle8.v v8, (a2) + sub a0, a0, a3 + vxor.vv v16, v0, v8 + add a2, a2, a3 + vse8.v v16, (a1) + add a1, a1, a3 + bnez a0, xor_regs_2_ + ret +SYM_FUNC_END(xor_regs_2_) +EXPORT_SYMBOL(xor_regs_2_) + +SYM_FUNC_START(xor_regs_3_) + vsetvli a4, a0, e8, m8, ta, ma + vle8.v v0, (a1) + vle8.v v8, (a2) + sub a0, a0, a4 + vxor.vv v0, v0, v8 + vle8.v v16, (a3) + add a2, a2, a4 + vxor.vv v16, v0, v16 + add a3, a3, a4 + vse8.v v16, (a1) + add a1, a1, a4 + bnez a0, xor_regs_3_ + ret +SYM_FUNC_END(xor_regs_3_) +EXPORT_SYMBOL(xor_regs_3_) + +SYM_FUNC_START(xor_regs_4_) + vsetvli a5, a0, e8, m8, ta, ma + vle8.v v0, (a1) + vle8.v v8, (a2) + sub a0, a0, a5 + vxor.vv v0, v0, v8 + vle8.v v16, (a3) + add a2, a2, a5 + vxor.vv v0, v0, v16 + vle8.v v24, (a4) + add a3, a3, a5 + vxor.vv v16, v0, v24 + add a4, a4, a5 + vse8.v v16, (a1) + add a1, a1, a5 + bnez a0, xor_regs_4_ + ret +SYM_FUNC_END(xor_regs_4_) +EXPORT_SYMBOL(xor_regs_4_) + +SYM_FUNC_START(xor_regs_5_) + vsetvli a6, a0, e8, m8, ta, ma + vle8.v v0, (a1) + vle8.v v8, (a2) + sub a0, a0, a6 + vxor.vv v0, v0, v8 + vle8.v v16, (a3) + add a2, a2, a6 + vxor.vv v0, v0, v16 + vle8.v v24, (a4) + add a3, a3, a6 + vxor.vv v0, v0, v24 + vle8.v v8, (a5) + add a4, a4, a6 + vxor.vv v16, v0, v8 + add a5, a5, a6 + vse8.v v16, (a1) + add a1, a1, a6 + bnez a0, xor_regs_5_ + ret +SYM_FUNC_END(xor_regs_5_) +EXPORT_SYMBOL(xor_regs_5_) diff --git a/arch/riscv/purgatory/entry.S b/arch/riscv/purgatory/entry.S index a4ede42bc1510..96df126df649c 100644 --- a/arch/riscv/purgatory/entry.S +++ b/arch/riscv/purgatory/entry.S @@ -8,17 +8,12 @@ * */ -.macro size, sym:req - .size \sym, . - \sym -.endm #include #include .text -.globl purgatory_start -purgatory_start: - +SYM_CODE_START(purgatory_start) lla sp, .Lstack mv s0, a0 /* The hartid of the current hart */ mv s1, a1 /* Phys address of the FDT image */ @@ -30,8 +25,7 @@ purgatory_start: mv a1, s1 ld a2, riscv_kernel_entry jr a2 - -size purgatory_start +SYM_CODE_END(purgatory_start) .align 4 .rept 256 @@ -42,9 +36,6 @@ size purgatory_start .data .align LGREG -.globl riscv_kernel_entry -riscv_kernel_entry: - .quad 0 -size riscv_kernel_entry +SYM_DATA(riscv_kernel_entry, .quad 0) .end diff --git a/drivers/iommu/arm/arm-smmu/qcom_iommu.c b/drivers/iommu/arm/arm-smmu/qcom_iommu.c index 775a3cbaff4ed..97b2122032b23 100644 --- a/drivers/iommu/arm/arm-smmu/qcom_iommu.c +++ b/drivers/iommu/arm/arm-smmu/qcom_iommu.c @@ -332,12 +332,10 @@ static int qcom_iommu_init_domain(struct iommu_domain *domain, return ret; } -static struct iommu_domain *qcom_iommu_domain_alloc(unsigned type) +static struct iommu_domain *qcom_iommu_domain_alloc_paging(struct device *dev) { struct qcom_iommu_domain *qcom_domain; - if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_DMA) - return NULL; /* * Allocate the domain and initialise some of its data structures. * We can't really do anything meaningful until we've added a @@ -400,6 +398,44 @@ static int qcom_iommu_attach_dev(struct iommu_domain *domain, struct device *dev return 0; } +static int qcom_iommu_identity_attach(struct iommu_domain *identity_domain, + struct device *dev) +{ + struct iommu_domain *domain = iommu_get_domain_for_dev(dev); + struct qcom_iommu_domain *qcom_domain; + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); + struct qcom_iommu_dev *qcom_iommu = to_iommu(dev); + unsigned int i; + + if (domain == identity_domain || !domain) + return 0; + + qcom_domain = to_qcom_iommu_domain(domain); + if (WARN_ON(!qcom_domain->iommu)) + return -EINVAL; + + pm_runtime_get_sync(qcom_iommu->dev); + for (i = 0; i < fwspec->num_ids; i++) { + struct qcom_iommu_ctx *ctx = to_ctx(qcom_domain, fwspec->ids[i]); + + /* Disable the context bank: */ + iommu_writel(ctx, ARM_SMMU_CB_SCTLR, 0); + + ctx->domain = NULL; + } + pm_runtime_put_sync(qcom_iommu->dev); + return 0; +} + +static struct iommu_domain_ops qcom_iommu_identity_ops = { + .attach_dev = qcom_iommu_identity_attach, +}; + +static struct iommu_domain qcom_iommu_identity_domain = { + .type = IOMMU_DOMAIN_IDENTITY, + .ops = &qcom_iommu_identity_ops, +}; + static int qcom_iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t pgsize, size_t pgcount, int prot, gfp_t gfp, size_t *mapped) @@ -565,8 +601,9 @@ static int qcom_iommu_of_xlate(struct device *dev, struct of_phandle_args *args) } static const struct iommu_ops qcom_iommu_ops = { + .identity_domain = &qcom_iommu_identity_domain, .capable = qcom_iommu_capable, - .domain_alloc = qcom_iommu_domain_alloc, + .domain_alloc_paging = qcom_iommu_domain_alloc_paging, .probe_device = qcom_iommu_probe_device, .device_group = generic_device_group, .of_xlate = qcom_iommu_of_xlate, diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index c275fe71c4db3..d6dead2ed10c1 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -24,6 +24,7 @@ typedef u32 sysmmu_iova_t; typedef u32 sysmmu_pte_t; +static struct iommu_domain exynos_identity_domain; /* We do not consider super section mapping (16MB) */ #define SECT_ORDER 20 @@ -829,7 +830,7 @@ static int __maybe_unused exynos_sysmmu_suspend(struct device *dev) struct exynos_iommu_owner *owner = dev_iommu_priv_get(master); mutex_lock(&owner->rpm_lock); - if (data->domain) { + if (&data->domain->domain != &exynos_identity_domain) { dev_dbg(data->sysmmu, "saving state\n"); __sysmmu_disable(data); } @@ -847,7 +848,7 @@ static int __maybe_unused exynos_sysmmu_resume(struct device *dev) struct exynos_iommu_owner *owner = dev_iommu_priv_get(master); mutex_lock(&owner->rpm_lock); - if (data->domain) { + if (&data->domain->domain != &exynos_identity_domain) { dev_dbg(data->sysmmu, "restoring state\n"); __sysmmu_enable(data); } @@ -886,7 +887,7 @@ static inline void exynos_iommu_set_pte(sysmmu_pte_t *ent, sysmmu_pte_t val) DMA_TO_DEVICE); } -static struct iommu_domain *exynos_iommu_domain_alloc(unsigned type) +static struct iommu_domain *exynos_iommu_domain_alloc_paging(struct device *dev) { struct exynos_iommu_domain *domain; dma_addr_t handle; @@ -895,9 +896,6 @@ static struct iommu_domain *exynos_iommu_domain_alloc(unsigned type) /* Check if correct PTE offsets are initialized */ BUG_ON(PG_ENT_SHIFT < 0 || !dma_dev); - if (type != IOMMU_DOMAIN_DMA && type != IOMMU_DOMAIN_UNMANAGED) - return NULL; - domain = kzalloc(sizeof(*domain), GFP_KERNEL); if (!domain) return NULL; @@ -980,17 +978,20 @@ static void exynos_iommu_domain_free(struct iommu_domain *iommu_domain) kfree(domain); } -static void exynos_iommu_detach_device(struct iommu_domain *iommu_domain, - struct device *dev) +static int exynos_iommu_identity_attach(struct iommu_domain *identity_domain, + struct device *dev) { - struct exynos_iommu_domain *domain = to_exynos_domain(iommu_domain); struct exynos_iommu_owner *owner = dev_iommu_priv_get(dev); - phys_addr_t pagetable = virt_to_phys(domain->pgtable); + struct exynos_iommu_domain *domain; + phys_addr_t pagetable; struct sysmmu_drvdata *data, *next; unsigned long flags; - if (!has_sysmmu(dev) || owner->domain != iommu_domain) - return; + if (owner->domain == identity_domain) + return 0; + + domain = to_exynos_domain(owner->domain); + pagetable = virt_to_phys(domain->pgtable); mutex_lock(&owner->rpm_lock); @@ -1009,15 +1010,25 @@ static void exynos_iommu_detach_device(struct iommu_domain *iommu_domain, list_del_init(&data->domain_node); spin_unlock(&data->lock); } - owner->domain = NULL; + owner->domain = identity_domain; spin_unlock_irqrestore(&domain->lock, flags); mutex_unlock(&owner->rpm_lock); - dev_dbg(dev, "%s: Detached IOMMU with pgtable %pa\n", __func__, - &pagetable); + dev_dbg(dev, "%s: Restored IOMMU to IDENTITY from pgtable %pa\n", + __func__, &pagetable); + return 0; } +static struct iommu_domain_ops exynos_identity_ops = { + .attach_dev = exynos_iommu_identity_attach, +}; + +static struct iommu_domain exynos_identity_domain = { + .type = IOMMU_DOMAIN_IDENTITY, + .ops = &exynos_identity_ops, +}; + static int exynos_iommu_attach_device(struct iommu_domain *iommu_domain, struct device *dev) { @@ -1026,12 +1037,11 @@ static int exynos_iommu_attach_device(struct iommu_domain *iommu_domain, struct sysmmu_drvdata *data; phys_addr_t pagetable = virt_to_phys(domain->pgtable); unsigned long flags; + int err; - if (!has_sysmmu(dev)) - return -ENODEV; - - if (owner->domain) - exynos_iommu_detach_device(owner->domain, dev); + err = exynos_iommu_identity_attach(&exynos_identity_domain, dev); + if (err) + return err; mutex_lock(&owner->rpm_lock); @@ -1407,26 +1417,12 @@ static struct iommu_device *exynos_iommu_probe_device(struct device *dev) return &data->iommu; } -static void exynos_iommu_set_platform_dma(struct device *dev) -{ - struct exynos_iommu_owner *owner = dev_iommu_priv_get(dev); - - if (owner->domain) { - struct iommu_group *group = iommu_group_get(dev); - - if (group) { - exynos_iommu_detach_device(owner->domain, dev); - iommu_group_put(group); - } - } -} - static void exynos_iommu_release_device(struct device *dev) { struct exynos_iommu_owner *owner = dev_iommu_priv_get(dev); struct sysmmu_drvdata *data; - exynos_iommu_set_platform_dma(dev); + WARN_ON(exynos_iommu_identity_attach(&exynos_identity_domain, dev)); list_for_each_entry(data, &owner->controllers, owner_node) device_link_del(data->link); @@ -1457,6 +1453,7 @@ static int exynos_iommu_of_xlate(struct device *dev, INIT_LIST_HEAD(&owner->controllers); mutex_init(&owner->rpm_lock); + owner->domain = &exynos_identity_domain; dev_iommu_priv_set(dev, owner); } @@ -1471,11 +1468,9 @@ static int exynos_iommu_of_xlate(struct device *dev, } static const struct iommu_ops exynos_iommu_ops = { - .domain_alloc = exynos_iommu_domain_alloc, + .identity_domain = &exynos_identity_domain, + .domain_alloc_paging = exynos_iommu_domain_alloc_paging, .device_group = generic_device_group, -#ifdef CONFIG_ARM - .set_platform_dma_ops = exynos_iommu_set_platform_dma, -#endif .probe_device = exynos_iommu_probe_device, .release_device = exynos_iommu_release_device, .pgsize_bitmap = SECT_SIZE | LPAGE_SIZE | SPAGE_SIZE, diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c index 4ac0e247ec2b5..e9d2bff4659b7 100644 --- a/drivers/iommu/fsl_pamu_domain.c +++ b/drivers/iommu/fsl_pamu_domain.c @@ -196,6 +196,13 @@ static struct iommu_domain *fsl_pamu_domain_alloc(unsigned type) { struct fsl_dma_domain *dma_domain; + /* + * FIXME: This isn't creating an unmanaged domain since the + * default_domain_ops do not have any map/unmap function it doesn't meet + * the requirements for __IOMMU_DOMAIN_PAGING. The only purpose seems to + * allow drivers/soc/fsl/qbman/qman_portal.c to do + * fsl_pamu_configure_l1_stash() + */ if (type != IOMMU_DOMAIN_UNMANAGED) return NULL; @@ -283,15 +290,33 @@ static int fsl_pamu_attach_device(struct iommu_domain *domain, return ret; } -static void fsl_pamu_set_platform_dma(struct device *dev) +/* + * FIXME: fsl/pamu is completely broken in terms of how it works with the iommu + * API. Immediately after probe the HW is left in an IDENTITY translation and + * the driver provides a non-working UNMANAGED domain that it can switch over + * to. However it cannot switch back to an IDENTITY translation, instead it + * switches to what looks like BLOCKING. + */ +static int fsl_pamu_platform_attach(struct iommu_domain *platform_domain, + struct device *dev) { struct iommu_domain *domain = iommu_get_domain_for_dev(dev); - struct fsl_dma_domain *dma_domain = to_fsl_dma_domain(domain); + struct fsl_dma_domain *dma_domain; const u32 *prop; int len; struct pci_dev *pdev = NULL; struct pci_controller *pci_ctl; + /* + * Hack to keep things working as they always have, only leaving an + * UNMANAGED domain makes it BLOCKING. + */ + if (domain == platform_domain || !domain || + domain->type != IOMMU_DOMAIN_UNMANAGED) + return 0; + + dma_domain = to_fsl_dma_domain(domain); + /* * Use LIODN of the PCI controller while detaching a * PCI device. @@ -312,8 +337,18 @@ static void fsl_pamu_set_platform_dma(struct device *dev) detach_device(dev, dma_domain); else pr_debug("missing fsl,liodn property at %pOF\n", dev->of_node); + return 0; } +static struct iommu_domain_ops fsl_pamu_platform_ops = { + .attach_dev = fsl_pamu_platform_attach, +}; + +static struct iommu_domain fsl_pamu_platform_domain = { + .type = IOMMU_DOMAIN_PLATFORM, + .ops = &fsl_pamu_platform_ops, +}; + /* Set the domain stash attribute */ int fsl_pamu_configure_l1_stash(struct iommu_domain *domain, u32 cpu) { @@ -395,11 +430,11 @@ static struct iommu_device *fsl_pamu_probe_device(struct device *dev) } static const struct iommu_ops fsl_pamu_ops = { + .default_domain = &fsl_pamu_platform_domain, .capable = fsl_pamu_capable, .domain_alloc = fsl_pamu_domain_alloc, .probe_device = fsl_pamu_probe_device, .device_group = fsl_pamu_device_group, - .set_platform_dma_ops = fsl_pamu_set_platform_dma, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = fsl_pamu_attach_device, .iova_to_phys = fsl_pamu_iova_to_phys, diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 3fa5699b9ff19..a7f2bbf665ce5 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -96,8 +96,8 @@ static const char * const iommu_group_resv_type_string[] = { static int iommu_bus_notifier(struct notifier_block *nb, unsigned long action, void *data); static void iommu_release_device(struct device *dev); -static struct iommu_domain *__iommu_domain_alloc(const struct bus_type *bus, - unsigned type); +static struct iommu_domain * +__iommu_group_domain_alloc(struct iommu_group *group, unsigned int type); static int __iommu_attach_device(struct iommu_domain *domain, struct device *dev); static int __iommu_attach_group(struct iommu_domain *domain, @@ -184,6 +184,8 @@ static const char *iommu_domain_type_str(unsigned int t) case IOMMU_DOMAIN_DMA: case IOMMU_DOMAIN_DMA_FQ: return "Translated"; + case IOMMU_DOMAIN_PLATFORM: + return "Platform"; default: return "Unknown"; } @@ -566,18 +568,6 @@ int iommu_probe_device(struct device *dev) mutex_lock(&iommu_probe_device_lock); ret = __iommu_probe_device(dev, NULL); mutex_unlock(&iommu_probe_device_lock); - - /* - * The dma_configure replay paths need bus_iommu_probe() to - * finish before they can call arch_setup_dma_ops() - */ - if (IS_ENABLED(CONFIG_IOMMU_DMA) && !ret && dev->iommu_group) { - mutex_lock(&dev->iommu_group->mutex); - if (!dev->iommu_group->default_domain && - !dev_iommu_ops(dev)->set_platform_dma_ops) - ret = -EPROBE_DEFER; - mutex_unlock(&dev->iommu_group->mutex); - } if (ret) return ret; @@ -1727,26 +1717,29 @@ struct iommu_group *fsl_mc_device_group(struct device *dev) } EXPORT_SYMBOL_GPL(fsl_mc_device_group); -static int iommu_get_def_domain_type(struct device *dev) -{ - const struct iommu_ops *ops = dev_iommu_ops(dev); - - if (dev_is_pci(dev) && to_pci_dev(dev)->untrusted) - return IOMMU_DOMAIN_DMA; - - if (ops->def_domain_type) - return ops->def_domain_type(dev); - - return 0; -} - static struct iommu_domain * -__iommu_group_alloc_default_domain(const struct bus_type *bus, - struct iommu_group *group, int req_type) +__iommu_group_alloc_default_domain(struct iommu_group *group, int req_type) { if (group->default_domain && group->default_domain->type == req_type) return group->default_domain; - return __iommu_domain_alloc(bus, req_type); + return __iommu_group_domain_alloc(group, req_type); +} + +/* + * Returns the iommu_ops for the devices in an iommu group. + * + * It is assumed that all devices in an iommu group are managed by a single + * IOMMU unit. Therefore, this returns the dev_iommu_ops of the first device + * in the group. + */ +static const struct iommu_ops *group_iommu_ops(struct iommu_group *group) +{ + struct group_device *device = + list_first_entry(&group->devices, struct group_device, list); + + lockdep_assert_held(&group->mutex); + + return dev_iommu_ops(device->dev); } /* @@ -1756,25 +1749,34 @@ __iommu_group_alloc_default_domain(const struct bus_type *bus, static struct iommu_domain * iommu_group_alloc_default_domain(struct iommu_group *group, int req_type) { - const struct bus_type *bus = - list_first_entry(&group->devices, struct group_device, list) - ->dev->bus; + const struct iommu_ops *ops = group_iommu_ops(group); struct iommu_domain *dom; lockdep_assert_held(&group->mutex); + /* + * Allow legacy drivers to specify the domain that will be the default + * domain. This should always be either an IDENTITY/BLOCKED/PLATFORM + * domain. Do not use in new drivers. + */ + if (ops->default_domain) { + if (req_type) + return ERR_PTR(-EINVAL); + return ops->default_domain; + } + if (req_type) - return __iommu_group_alloc_default_domain(bus, group, req_type); + return __iommu_group_alloc_default_domain(group, req_type); /* The driver gave no guidance on what type to use, try the default */ - dom = __iommu_group_alloc_default_domain(bus, group, iommu_def_domain_type); + dom = __iommu_group_alloc_default_domain(group, iommu_def_domain_type); if (dom) return dom; /* Otherwise IDENTITY and DMA_FQ defaults will try DMA */ if (iommu_def_domain_type == IOMMU_DOMAIN_DMA) return NULL; - dom = __iommu_group_alloc_default_domain(bus, group, IOMMU_DOMAIN_DMA); + dom = __iommu_group_alloc_default_domain(group, IOMMU_DOMAIN_DMA); if (!dom) return NULL; @@ -1820,40 +1822,109 @@ static int iommu_bus_notifier(struct notifier_block *nb, return 0; } -/* A target_type of 0 will select the best domain type and cannot fail */ +/* + * Combine the driver's chosen def_domain_type across all the devices in a + * group. Drivers must give a consistent result. + */ +static int iommu_get_def_domain_type(struct iommu_group *group, + struct device *dev, int cur_type) +{ + const struct iommu_ops *ops = group_iommu_ops(group); + int type; + + if (!ops->def_domain_type) + return cur_type; + + type = ops->def_domain_type(dev); + if (!type || cur_type == type) + return cur_type; + if (!cur_type) + return type; + + dev_err_ratelimited( + dev, + "IOMMU driver error, requesting conflicting def_domain_type, %s and %s, for devices in group %u.\n", + iommu_domain_type_str(cur_type), iommu_domain_type_str(type), + group->id); + + /* + * Try to recover, drivers are allowed to force IDENITY or DMA, IDENTITY + * takes precedence. + */ + if (type == IOMMU_DOMAIN_IDENTITY) + return type; + return cur_type; +} + +/* + * A target_type of 0 will select the best domain type. 0 can be returned in + * this case meaning the global default should be used. + */ static int iommu_get_default_domain_type(struct iommu_group *group, int target_type) { - int best_type = target_type; + struct device *untrusted = NULL; struct group_device *gdev; - struct device *last_dev; + int driver_type = 0; lockdep_assert_held(&group->mutex); + /* + * ARM32 drivers supporting CONFIG_ARM_DMA_USE_IOMMU can declare an + * identity_domain and it will automatically become their default + * domain. Later on ARM_DMA_USE_IOMMU will install its UNMANAGED domain. + * Override the selection to IDENTITY. + */ + if (IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)) { + static_assert(!(IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU) && + IS_ENABLED(CONFIG_IOMMU_DMA))); + driver_type = IOMMU_DOMAIN_IDENTITY; + } + for_each_group_device(group, gdev) { - unsigned int type = iommu_get_def_domain_type(gdev->dev); - - if (best_type && type && best_type != type) { - if (target_type) { - dev_err_ratelimited( - gdev->dev, - "Device cannot be in %s domain\n", - iommu_domain_type_str(target_type)); + driver_type = iommu_get_def_domain_type(group, gdev->dev, + driver_type); + + if (dev_is_pci(gdev->dev) && to_pci_dev(gdev->dev)->untrusted) { + /* + * No ARM32 using systems will set untrusted, it cannot + * work. + */ + if (WARN_ON(IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU))) return -1; - } + untrusted = gdev->dev; + } + } - dev_warn( - gdev->dev, - "Device needs domain type %s, but device %s in the same iommu group requires type %s - using default\n", - iommu_domain_type_str(type), dev_name(last_dev), - iommu_domain_type_str(best_type)); - return 0; + /* + * If the common dma ops are not selected in kconfig then we cannot use + * IOMMU_DOMAIN_DMA at all. Force IDENTITY if nothing else has been + * selected. + */ + if (!IS_ENABLED(CONFIG_IOMMU_DMA)) { + if (WARN_ON(driver_type == IOMMU_DOMAIN_DMA)) + return -1; + if (!driver_type) + driver_type = IOMMU_DOMAIN_IDENTITY; + } + + if (untrusted) { + if (driver_type && driver_type != IOMMU_DOMAIN_DMA) { + dev_err_ratelimited( + untrusted, + "Device is not trusted, but driver is overriding group %u to %s, refusing to probe.\n", + group->id, iommu_domain_type_str(driver_type)); + return -1; } - if (!best_type) - best_type = type; - last_dev = gdev->dev; + driver_type = IOMMU_DOMAIN_DMA; } - return best_type; + + if (target_type) { + if (driver_type && target_type != driver_type) + return -1; + return target_type; + } + return driver_type; } static void iommu_group_do_probe_finalize(struct device *dev) @@ -1982,16 +2053,22 @@ void iommu_set_fault_handler(struct iommu_domain *domain, } EXPORT_SYMBOL_GPL(iommu_set_fault_handler); -static struct iommu_domain *__iommu_domain_alloc(const struct bus_type *bus, - unsigned type) +static struct iommu_domain *__iommu_domain_alloc(const struct iommu_ops *ops, + struct device *dev, + unsigned int type) { struct iommu_domain *domain; unsigned int alloc_type = type & IOMMU_DOMAIN_ALLOC_FLAGS; - if (bus == NULL || bus->iommu_ops == NULL) + if (alloc_type == IOMMU_DOMAIN_IDENTITY && ops->identity_domain) + return ops->identity_domain; + else if (type & __IOMMU_DOMAIN_PAGING && ops->domain_alloc_paging) + domain = ops->domain_alloc_paging(dev); + else if (ops->domain_alloc) + domain = ops->domain_alloc(alloc_type); + else return NULL; - domain = bus->iommu_ops->domain_alloc(alloc_type); if (!domain) return NULL; @@ -2001,10 +2078,10 @@ static struct iommu_domain *__iommu_domain_alloc(const struct bus_type *bus, * may override this later */ if (!domain->pgsize_bitmap) - domain->pgsize_bitmap = bus->iommu_ops->pgsize_bitmap; + domain->pgsize_bitmap = ops->pgsize_bitmap; if (!domain->ops) - domain->ops = bus->iommu_ops->default_domain_ops; + domain->ops = ops->default_domain_ops; if (iommu_is_dma_domain(domain) && iommu_get_dma_cookie(domain)) { iommu_domain_free(domain); @@ -2013,9 +2090,22 @@ static struct iommu_domain *__iommu_domain_alloc(const struct bus_type *bus, return domain; } +static struct iommu_domain * +__iommu_group_domain_alloc(struct iommu_group *group, unsigned int type) +{ + struct device *dev = + list_first_entry(&group->devices, struct group_device, list) + ->dev; + + return __iommu_domain_alloc(group_iommu_ops(group), dev, type); +} + struct iommu_domain *iommu_domain_alloc(const struct bus_type *bus) { - return __iommu_domain_alloc(bus, IOMMU_DOMAIN_UNMANAGED); + if (bus == NULL || bus->iommu_ops == NULL) + return NULL; + return __iommu_domain_alloc(bus->iommu_ops, NULL, + IOMMU_DOMAIN_UNMANAGED); } EXPORT_SYMBOL_GPL(iommu_domain_alloc); @@ -2024,7 +2114,8 @@ void iommu_domain_free(struct iommu_domain *domain) if (domain->type == IOMMU_DOMAIN_SVA) mmdrop(domain->mm); iommu_put_dma_cookie(domain); - domain->ops->free(domain); + if (domain->ops->free) + domain->ops->free(domain); } EXPORT_SYMBOL_GPL(iommu_domain_free); @@ -2287,21 +2378,8 @@ static int __iommu_group_set_domain_internal(struct iommu_group *group, if (group->domain == new_domain) return 0; - /* - * New drivers should support default domains, so set_platform_dma() - * op will never be called. Otherwise the NULL domain represents some - * platform specific behavior. - */ - if (!new_domain) { - for_each_group_device(group, gdev) { - const struct iommu_ops *ops = dev_iommu_ops(gdev->dev); - - if (!WARN_ON(!ops->set_platform_dma_ops)) - ops->set_platform_dma_ops(gdev->dev); - } - group->domain = NULL; - return 0; - } + if (WARN_ON(!new_domain)) + return -EINVAL; /* * Changing the domain is done by calling attach_dev() on the new @@ -2337,19 +2415,15 @@ static int __iommu_group_set_domain_internal(struct iommu_group *group, */ last_gdev = gdev; for_each_group_device(group, gdev) { - const struct iommu_ops *ops = dev_iommu_ops(gdev->dev); - /* - * If set_platform_dma_ops is not present a NULL domain can - * happen only for first probe, in which case we leave - * group->domain as NULL and let release clean everything up. + * A NULL domain can happen only for first probe, in which case + * we leave group->domain as NULL and let release clean + * everything up. */ if (group->domain) WARN_ON(__iommu_device_set_domain( group, gdev->dev, group->domain, IOMMU_SET_DOMAIN_MUST_SUCCEED)); - else if (ops->set_platform_dma_ops) - ops->set_platform_dma_ops(gdev->dev); if (gdev == last_gdev) break; } @@ -2971,21 +3045,9 @@ static int iommu_setup_default_domain(struct iommu_group *group, if (req_type < 0) return -EINVAL; - /* - * There are still some drivers which don't support default domains, so - * we ignore the failure and leave group->default_domain NULL. - * - * We assume that the iommu driver starts up the device in - * 'set_platform_dma_ops' mode if it does not support default domains. - */ dom = iommu_group_alloc_default_domain(group, req_type); - if (!dom) { - /* Once in default_domain mode we never leave */ - if (group->default_domain) - return -ENODEV; - group->default_domain = NULL; - return 0; - } + if (!dom) + return -ENODEV; if (group->default_domain == dom) return 0; @@ -3164,8 +3226,7 @@ int iommu_device_use_default_domain(struct device *dev) mutex_lock(&group->mutex); /* We may race against bus_iommu_probe() finalising groups here */ - if (IS_ENABLED(CONFIG_IOMMU_DMA) && !group->default_domain && - !dev_iommu_ops(dev)->set_platform_dma_ops) { + if (!group->default_domain) { ret = -EPROBE_DEFER; goto unlock_out; } @@ -3211,21 +3272,18 @@ void iommu_device_unuse_default_domain(struct device *dev) static int __iommu_group_alloc_blocking_domain(struct iommu_group *group) { - struct group_device *dev = - list_first_entry(&group->devices, struct group_device, list); - if (group->blocking_domain) return 0; group->blocking_domain = - __iommu_domain_alloc(dev->dev->bus, IOMMU_DOMAIN_BLOCKED); + __iommu_group_domain_alloc(group, IOMMU_DOMAIN_BLOCKED); if (!group->blocking_domain) { /* * For drivers that do not yet understand IOMMU_DOMAIN_BLOCKED * create an empty domain instead. */ - group->blocking_domain = __iommu_domain_alloc( - dev->dev->bus, IOMMU_DOMAIN_UNMANAGED); + group->blocking_domain = __iommu_group_domain_alloc( + group, IOMMU_DOMAIN_UNMANAGED); if (!group->blocking_domain) return -EINVAL; } diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c index 00b794d74e03b..aa56b67c44d48 100644 --- a/drivers/iommu/iommufd/selftest.c +++ b/drivers/iommu/iommufd/selftest.c @@ -126,10 +126,6 @@ struct selftest_obj { }; }; -static void mock_domain_blocking_free(struct iommu_domain *domain) -{ -} - static int mock_domain_nop_attach(struct iommu_domain *domain, struct device *dev) { @@ -137,7 +133,6 @@ static int mock_domain_nop_attach(struct iommu_domain *domain, } static const struct iommu_domain_ops mock_blocking_ops = { - .free = mock_domain_blocking_free, .attach_dev = mock_domain_nop_attach, }; @@ -301,14 +296,6 @@ static bool mock_domain_capable(struct device *dev, enum iommu_cap cap) return cap == IOMMU_CAP_CACHE_COHERENCY; } -static void mock_domain_set_plaform_dma_ops(struct device *dev) -{ - /* - * mock doesn't setup default domains because we can't hook into the - * normal probe path - */ -} - static struct iommu_device mock_iommu_device = { }; @@ -318,12 +305,16 @@ static struct iommu_device *mock_probe_device(struct device *dev) } static const struct iommu_ops mock_ops = { + /* + * IOMMU_DOMAIN_BLOCKED cannot be returned from def_domain_type() + * because it is zero. + */ + .default_domain = &mock_blocking_domain, .owner = THIS_MODULE, .pgsize_bitmap = MOCK_IO_PAGE_SIZE, .hw_info = mock_domain_hw_info, .domain_alloc = mock_domain_alloc, .capable = mock_domain_capable, - .set_platform_dma_ops = mock_domain_set_plaform_dma_ops, .device_group = generic_device_group, .probe_device = mock_probe_device, .default_domain_ops = diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 65ff69477c43e..eaabae7615776 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -295,6 +295,18 @@ static void ipmmu_utlb_enable(struct ipmmu_vmsa_domain *domain, mmu->utlb_ctx[utlb] = domain->context_id; } +/* + * Disable MMU translation for the microTLB. + */ +static void ipmmu_utlb_disable(struct ipmmu_vmsa_domain *domain, + unsigned int utlb) +{ + struct ipmmu_vmsa_device *mmu = domain->mmu; + + ipmmu_imuctr_write(mmu, utlb, 0); + mmu->utlb_ctx[utlb] = IPMMU_CTX_INVALID; +} + static void ipmmu_tlb_flush_all(void *cookie) { struct ipmmu_vmsa_domain *domain = cookie; @@ -551,13 +563,10 @@ static irqreturn_t ipmmu_irq(int irq, void *dev) * IOMMU Operations */ -static struct iommu_domain *ipmmu_domain_alloc(unsigned type) +static struct iommu_domain *ipmmu_domain_alloc_paging(struct device *dev) { struct ipmmu_vmsa_domain *domain; - if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_DMA) - return NULL; - domain = kzalloc(sizeof(*domain), GFP_KERNEL); if (!domain) return NULL; @@ -627,6 +636,36 @@ static int ipmmu_attach_device(struct iommu_domain *io_domain, return 0; } +static int ipmmu_iommu_identity_attach(struct iommu_domain *identity_domain, + struct device *dev) +{ + struct iommu_domain *io_domain = iommu_get_domain_for_dev(dev); + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); + struct ipmmu_vmsa_domain *domain; + unsigned int i; + + if (io_domain == identity_domain || !io_domain) + return 0; + + domain = to_vmsa_domain(io_domain); + for (i = 0; i < fwspec->num_ids; ++i) + ipmmu_utlb_disable(domain, fwspec->ids[i]); + + /* + * TODO: Optimize by disabling the context when no device is attached. + */ + return 0; +} + +static struct iommu_domain_ops ipmmu_iommu_identity_ops = { + .attach_dev = ipmmu_iommu_identity_attach, +}; + +static struct iommu_domain ipmmu_iommu_identity_domain = { + .type = IOMMU_DOMAIN_IDENTITY, + .ops = &ipmmu_iommu_identity_ops, +}; + static int ipmmu_map(struct iommu_domain *io_domain, unsigned long iova, phys_addr_t paddr, size_t pgsize, size_t pgcount, int prot, gfp_t gfp, size_t *mapped) @@ -849,7 +888,8 @@ static struct iommu_group *ipmmu_find_group(struct device *dev) } static const struct iommu_ops ipmmu_ops = { - .domain_alloc = ipmmu_domain_alloc, + .identity_domain = &ipmmu_iommu_identity_domain, + .domain_alloc_paging = ipmmu_domain_alloc_paging, .probe_device = ipmmu_probe_device, .release_device = ipmmu_release_device, .probe_finalize = ipmmu_probe_finalize, diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c index 79d89bad5132b..a163cee0b7242 100644 --- a/drivers/iommu/msm_iommu.c +++ b/drivers/iommu/msm_iommu.c @@ -302,13 +302,10 @@ static void __program_context(void __iomem *base, int ctx, SET_M(base, ctx, 1); } -static struct iommu_domain *msm_iommu_domain_alloc(unsigned type) +static struct iommu_domain *msm_iommu_domain_alloc_paging(struct device *dev) { struct msm_priv *priv; - if (type != IOMMU_DOMAIN_UNMANAGED) - return NULL; - priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (!priv) goto fail_nomem; @@ -443,15 +440,20 @@ static int msm_iommu_attach_dev(struct iommu_domain *domain, struct device *dev) return ret; } -static void msm_iommu_set_platform_dma(struct device *dev) +static int msm_iommu_identity_attach(struct iommu_domain *identity_domain, + struct device *dev) { struct iommu_domain *domain = iommu_get_domain_for_dev(dev); - struct msm_priv *priv = to_msm_priv(domain); + struct msm_priv *priv; unsigned long flags; struct msm_iommu_dev *iommu; struct msm_iommu_ctx_dev *master; - int ret; + int ret = 0; + + if (domain == identity_domain || !domain) + return 0; + priv = to_msm_priv(domain); free_io_pgtable_ops(priv->iop); spin_lock_irqsave(&msm_iommu_lock, flags); @@ -468,8 +470,18 @@ static void msm_iommu_set_platform_dma(struct device *dev) } fail: spin_unlock_irqrestore(&msm_iommu_lock, flags); + return ret; } +static struct iommu_domain_ops msm_iommu_identity_ops = { + .attach_dev = msm_iommu_identity_attach, +}; + +static struct iommu_domain msm_iommu_identity_domain = { + .type = IOMMU_DOMAIN_IDENTITY, + .ops = &msm_iommu_identity_ops, +}; + static int msm_iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t pa, size_t pgsize, size_t pgcount, int prot, gfp_t gfp, size_t *mapped) @@ -675,10 +687,10 @@ irqreturn_t msm_iommu_fault_handler(int irq, void *dev_id) } static struct iommu_ops msm_iommu_ops = { - .domain_alloc = msm_iommu_domain_alloc, + .identity_domain = &msm_iommu_identity_domain, + .domain_alloc_paging = msm_iommu_domain_alloc_paging, .probe_device = msm_iommu_probe_device, .device_group = generic_device_group, - .set_platform_dma_ops = msm_iommu_set_platform_dma, .pgsize_bitmap = MSM_IOMMU_PGSIZES, .of_xlate = qcom_iommu_of_xlate, .default_domain_ops = &(const struct iommu_domain_ops) { diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 06c0770ff894e..f7e2b9cd39a2a 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -688,13 +688,10 @@ static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom, return 0; } -static struct iommu_domain *mtk_iommu_domain_alloc(unsigned type) +static struct iommu_domain *mtk_iommu_domain_alloc_paging(struct device *dev) { struct mtk_iommu_domain *dom; - if (type != IOMMU_DOMAIN_DMA && type != IOMMU_DOMAIN_UNMANAGED) - return NULL; - dom = kzalloc(sizeof(*dom), GFP_KERNEL); if (!dom) return NULL; @@ -776,6 +773,28 @@ static int mtk_iommu_attach_device(struct iommu_domain *domain, return ret; } +static int mtk_iommu_identity_attach(struct iommu_domain *identity_domain, + struct device *dev) +{ + struct iommu_domain *domain = iommu_get_domain_for_dev(dev); + struct mtk_iommu_data *data = dev_iommu_priv_get(dev); + + if (domain == identity_domain || !domain) + return 0; + + mtk_iommu_config(data, dev, false, 0); + return 0; +} + +static struct iommu_domain_ops mtk_iommu_identity_ops = { + .attach_dev = mtk_iommu_identity_attach, +}; + +static struct iommu_domain mtk_iommu_identity_domain = { + .type = IOMMU_DOMAIN_IDENTITY, + .ops = &mtk_iommu_identity_ops, +}; + static int mtk_iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t pgsize, size_t pgcount, int prot, gfp_t gfp, size_t *mapped) @@ -995,7 +1014,8 @@ static void mtk_iommu_get_resv_regions(struct device *dev, } static const struct iommu_ops mtk_iommu_ops = { - .domain_alloc = mtk_iommu_domain_alloc, + .identity_domain = &mtk_iommu_identity_domain, + .domain_alloc_paging = mtk_iommu_domain_alloc_paging, .probe_device = mtk_iommu_probe_device, .release_device = mtk_iommu_release_device, .device_group = mtk_iommu_device_group, diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c index f1754efcfe74e..574c6e38cfb3d 100644 --- a/drivers/iommu/mtk_iommu_v1.c +++ b/drivers/iommu/mtk_iommu_v1.c @@ -270,13 +270,10 @@ static int mtk_iommu_v1_domain_finalise(struct mtk_iommu_v1_data *data) return 0; } -static struct iommu_domain *mtk_iommu_v1_domain_alloc(unsigned type) +static struct iommu_domain *mtk_iommu_v1_domain_alloc_paging(struct device *dev) { struct mtk_iommu_v1_domain *dom; - if (type != IOMMU_DOMAIN_UNMANAGED) - return NULL; - dom = kzalloc(sizeof(*dom), GFP_KERNEL); if (!dom) return NULL; @@ -319,13 +316,24 @@ static int mtk_iommu_v1_attach_device(struct iommu_domain *domain, struct device return 0; } -static void mtk_iommu_v1_set_platform_dma(struct device *dev) +static int mtk_iommu_v1_identity_attach(struct iommu_domain *identity_domain, + struct device *dev) { struct mtk_iommu_v1_data *data = dev_iommu_priv_get(dev); mtk_iommu_v1_config(data, dev, false); + return 0; } +static struct iommu_domain_ops mtk_iommu_v1_identity_ops = { + .attach_dev = mtk_iommu_v1_identity_attach, +}; + +static struct iommu_domain mtk_iommu_v1_identity_domain = { + .type = IOMMU_DOMAIN_IDENTITY, + .ops = &mtk_iommu_v1_identity_ops, +}; + static int mtk_iommu_v1_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t pgsize, size_t pgcount, int prot, gfp_t gfp, size_t *mapped) @@ -441,11 +449,6 @@ static int mtk_iommu_v1_create_mapping(struct device *dev, struct of_phandle_arg return 0; } -static int mtk_iommu_v1_def_domain_type(struct device *dev) -{ - return IOMMU_DOMAIN_UNMANAGED; -} - static struct iommu_device *mtk_iommu_v1_probe_device(struct device *dev) { struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); @@ -578,14 +581,13 @@ static int mtk_iommu_v1_hw_init(const struct mtk_iommu_v1_data *data) } static const struct iommu_ops mtk_iommu_v1_ops = { - .domain_alloc = mtk_iommu_v1_domain_alloc, + .identity_domain = &mtk_iommu_v1_identity_domain, + .domain_alloc_paging = mtk_iommu_v1_domain_alloc_paging, .probe_device = mtk_iommu_v1_probe_device, .probe_finalize = mtk_iommu_v1_probe_finalize, .release_device = mtk_iommu_v1_release_device, - .def_domain_type = mtk_iommu_v1_def_domain_type, .device_group = generic_device_group, .pgsize_bitmap = MT2701_IOMMU_PAGE_SIZE, - .set_platform_dma_ops = mtk_iommu_v1_set_platform_dma, .owner = THIS_MODULE, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = mtk_iommu_v1_attach_device, diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index 537e402f9bba9..fcf99bd195b32 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -1555,23 +1555,35 @@ static void _omap_iommu_detach_dev(struct omap_iommu_domain *omap_domain, omap_domain->dev = NULL; } -static void omap_iommu_set_platform_dma(struct device *dev) +static int omap_iommu_identity_attach(struct iommu_domain *identity_domain, + struct device *dev) { struct iommu_domain *domain = iommu_get_domain_for_dev(dev); - struct omap_iommu_domain *omap_domain = to_omap_domain(domain); + struct omap_iommu_domain *omap_domain; + + if (domain == identity_domain || !domain) + return 0; + omap_domain = to_omap_domain(domain); spin_lock(&omap_domain->lock); _omap_iommu_detach_dev(omap_domain, dev); spin_unlock(&omap_domain->lock); + return 0; } -static struct iommu_domain *omap_iommu_domain_alloc(unsigned type) +static struct iommu_domain_ops omap_iommu_identity_ops = { + .attach_dev = omap_iommu_identity_attach, +}; + +static struct iommu_domain omap_iommu_identity_domain = { + .type = IOMMU_DOMAIN_IDENTITY, + .ops = &omap_iommu_identity_ops, +}; + +static struct iommu_domain *omap_iommu_domain_alloc_paging(struct device *dev) { struct omap_iommu_domain *omap_domain; - if (type != IOMMU_DOMAIN_UNMANAGED) - return NULL; - omap_domain = kzalloc(sizeof(*omap_domain), GFP_KERNEL); if (!omap_domain) return NULL; @@ -1732,11 +1744,11 @@ static struct iommu_group *omap_iommu_device_group(struct device *dev) } static const struct iommu_ops omap_iommu_ops = { - .domain_alloc = omap_iommu_domain_alloc, + .identity_domain = &omap_iommu_identity_domain, + .domain_alloc_paging = omap_iommu_domain_alloc_paging, .probe_device = omap_iommu_probe_device, .release_device = omap_iommu_release_device, .device_group = omap_iommu_device_group, - .set_platform_dma_ops = omap_iommu_set_platform_dma, .pgsize_bitmap = OMAP_IOMMU_PGSIZES, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = omap_iommu_attach_dev, diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c index 36fec26d2a04a..527a4395a1b40 100644 --- a/drivers/iommu/rockchip-iommu.c +++ b/drivers/iommu/rockchip-iommu.c @@ -989,13 +989,8 @@ static int rk_iommu_identity_attach(struct iommu_domain *identity_domain, return 0; } -static void rk_iommu_identity_free(struct iommu_domain *domain) -{ -} - static struct iommu_domain_ops rk_identity_ops = { .attach_dev = rk_iommu_identity_attach, - .free = rk_iommu_identity_free, }; static struct iommu_domain rk_identity_domain = { @@ -1003,13 +998,6 @@ static struct iommu_domain rk_identity_domain = { .ops = &rk_identity_ops, }; -#ifdef CONFIG_ARM -static void rk_iommu_set_platform_dma(struct device *dev) -{ - WARN_ON(rk_iommu_identity_attach(&rk_identity_domain, dev)); -} -#endif - static int rk_iommu_attach_device(struct iommu_domain *domain, struct device *dev) { @@ -1055,16 +1043,10 @@ static int rk_iommu_attach_device(struct iommu_domain *domain, return ret; } -static struct iommu_domain *rk_iommu_domain_alloc(unsigned type) +static struct iommu_domain *rk_iommu_domain_alloc_paging(struct device *dev) { struct rk_iommu_domain *rk_domain; - if (type == IOMMU_DOMAIN_IDENTITY) - return &rk_identity_domain; - - if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_DMA) - return NULL; - if (!dma_dev) return NULL; @@ -1185,13 +1167,11 @@ static int rk_iommu_of_xlate(struct device *dev, } static const struct iommu_ops rk_iommu_ops = { - .domain_alloc = rk_iommu_domain_alloc, + .identity_domain = &rk_identity_domain, + .domain_alloc_paging = rk_iommu_domain_alloc_paging, .probe_device = rk_iommu_probe_device, .release_device = rk_iommu_release_device, .device_group = rk_iommu_device_group, -#ifdef CONFIG_ARM - .set_platform_dma_ops = rk_iommu_set_platform_dma, -#endif .pgsize_bitmap = RK_IOMMU_PGSIZE_BITMAP, .of_xlate = rk_iommu_of_xlate, .default_domain_ops = &(const struct iommu_domain_ops) { diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c index fbf59a8db29b1..5695ad71d60e2 100644 --- a/drivers/iommu/s390-iommu.c +++ b/drivers/iommu/s390-iommu.c @@ -39,13 +39,10 @@ static bool s390_iommu_capable(struct device *dev, enum iommu_cap cap) } } -static struct iommu_domain *s390_domain_alloc(unsigned domain_type) +static struct iommu_domain *s390_domain_alloc_paging(struct device *dev) { struct s390_domain *s390_domain; - if (domain_type != IOMMU_DOMAIN_UNMANAGED) - return NULL; - s390_domain = kzalloc(sizeof(*s390_domain), GFP_KERNEL); if (!s390_domain) return NULL; @@ -142,14 +139,31 @@ static int s390_iommu_attach_device(struct iommu_domain *domain, return 0; } -static void s390_iommu_set_platform_dma(struct device *dev) +/* + * Switch control over the IOMMU to S390's internal dma_api ops + */ +static int s390_iommu_platform_attach(struct iommu_domain *platform_domain, + struct device *dev) { struct zpci_dev *zdev = to_zpci_dev(dev); + if (!zdev->s390_domain) + return 0; + __s390_iommu_detach_device(zdev); zpci_dma_init_device(zdev); + return 0; } +static struct iommu_domain_ops s390_iommu_platform_ops = { + .attach_dev = s390_iommu_platform_attach, +}; + +static struct iommu_domain s390_iommu_platform_domain = { + .type = IOMMU_DOMAIN_PLATFORM, + .ops = &s390_iommu_platform_ops, +}; + static void s390_iommu_get_resv_regions(struct device *dev, struct list_head *list) { @@ -428,12 +442,12 @@ void zpci_destroy_iommu(struct zpci_dev *zdev) } static const struct iommu_ops s390_iommu_ops = { + .default_domain = &s390_iommu_platform_domain, .capable = s390_iommu_capable, - .domain_alloc = s390_domain_alloc, + .domain_alloc_paging = s390_domain_alloc_paging, .probe_device = s390_iommu_probe_device, .release_device = s390_iommu_release_device, .device_group = generic_device_group, - .set_platform_dma_ops = s390_iommu_set_platform_dma, .pgsize_bitmap = SZ_4K, .get_resv_regions = s390_iommu_get_resv_regions, .default_domain_ops = &(const struct iommu_domain_ops) { diff --git a/drivers/iommu/sprd-iommu.c b/drivers/iommu/sprd-iommu.c index c8e79a2d8b4c6..1a4bc7287de22 100644 --- a/drivers/iommu/sprd-iommu.c +++ b/drivers/iommu/sprd-iommu.c @@ -134,13 +134,10 @@ sprd_iommu_pgt_size(struct iommu_domain *domain) SPRD_IOMMU_PAGE_SHIFT) * sizeof(u32); } -static struct iommu_domain *sprd_iommu_domain_alloc(unsigned int domain_type) +static struct iommu_domain *sprd_iommu_domain_alloc_paging(struct device *dev) { struct sprd_iommu_domain *dom; - if (domain_type != IOMMU_DOMAIN_DMA && domain_type != IOMMU_DOMAIN_UNMANAGED) - return NULL; - dom = kzalloc(sizeof(*dom), GFP_KERNEL); if (!dom) return NULL; @@ -421,7 +418,7 @@ static int sprd_iommu_of_xlate(struct device *dev, struct of_phandle_args *args) static const struct iommu_ops sprd_iommu_ops = { - .domain_alloc = sprd_iommu_domain_alloc, + .domain_alloc_paging = sprd_iommu_domain_alloc_paging, .probe_device = sprd_iommu_probe_device, .device_group = sprd_iommu_device_group, .of_xlate = sprd_iommu_of_xlate, diff --git a/drivers/iommu/sun50i-iommu.c b/drivers/iommu/sun50i-iommu.c index 94bd7f25f6f26..85ce3dc0bdd59 100644 --- a/drivers/iommu/sun50i-iommu.c +++ b/drivers/iommu/sun50i-iommu.c @@ -668,14 +668,11 @@ static phys_addr_t sun50i_iommu_iova_to_phys(struct iommu_domain *domain, sun50i_iova_get_page_offset(iova); } -static struct iommu_domain *sun50i_iommu_domain_alloc(unsigned type) +static struct iommu_domain * +sun50i_iommu_domain_alloc_paging(struct device *dev) { struct sun50i_iommu_domain *sun50i_domain; - if (type != IOMMU_DOMAIN_DMA && - type != IOMMU_DOMAIN_UNMANAGED) - return NULL; - sun50i_domain = kzalloc(sizeof(*sun50i_domain), GFP_KERNEL); if (!sun50i_domain) return NULL; @@ -758,21 +755,32 @@ static void sun50i_iommu_detach_domain(struct sun50i_iommu *iommu, iommu->domain = NULL; } -static void sun50i_iommu_detach_device(struct iommu_domain *domain, - struct device *dev) +static int sun50i_iommu_identity_attach(struct iommu_domain *identity_domain, + struct device *dev) { - struct sun50i_iommu_domain *sun50i_domain = to_sun50i_domain(domain); struct sun50i_iommu *iommu = dev_iommu_priv_get(dev); + struct sun50i_iommu_domain *sun50i_domain; dev_dbg(dev, "Detaching from IOMMU domain\n"); - if (iommu->domain != domain) - return; + if (iommu->domain == identity_domain) + return 0; + sun50i_domain = to_sun50i_domain(iommu->domain); if (refcount_dec_and_test(&sun50i_domain->refcnt)) sun50i_iommu_detach_domain(iommu, sun50i_domain); + return 0; } +static struct iommu_domain_ops sun50i_iommu_identity_ops = { + .attach_dev = sun50i_iommu_identity_attach, +}; + +static struct iommu_domain sun50i_iommu_identity_domain = { + .type = IOMMU_DOMAIN_IDENTITY, + .ops = &sun50i_iommu_identity_ops, +}; + static int sun50i_iommu_attach_device(struct iommu_domain *domain, struct device *dev) { @@ -790,8 +798,7 @@ static int sun50i_iommu_attach_device(struct iommu_domain *domain, if (iommu->domain == domain) return 0; - if (iommu->domain) - sun50i_iommu_detach_device(iommu->domain, dev); + sun50i_iommu_identity_attach(&sun50i_iommu_identity_domain, dev); sun50i_iommu_attach_domain(iommu, sun50i_domain); @@ -828,9 +835,10 @@ static int sun50i_iommu_of_xlate(struct device *dev, } static const struct iommu_ops sun50i_iommu_ops = { + .identity_domain = &sun50i_iommu_identity_domain, .pgsize_bitmap = SZ_4K, .device_group = sun50i_iommu_device_group, - .domain_alloc = sun50i_iommu_domain_alloc, + .domain_alloc_paging = sun50i_iommu_domain_alloc_paging, .of_xlate = sun50i_iommu_of_xlate, .probe_device = sun50i_iommu_probe_device, .default_domain_ops = &(const struct iommu_domain_ops) { @@ -986,6 +994,7 @@ static int sun50i_iommu_probe(struct platform_device *pdev) if (!iommu) return -ENOMEM; spin_lock_init(&iommu->iommu_lock); + iommu->domain = &sun50i_iommu_identity_domain; platform_set_drvdata(pdev, iommu); iommu->dev = &pdev->dev; diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index e445f80d02263..1764a63347b04 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -272,13 +272,10 @@ static void tegra_smmu_free_asid(struct tegra_smmu *smmu, unsigned int id) clear_bit(id, smmu->asids); } -static struct iommu_domain *tegra_smmu_domain_alloc(unsigned type) +static struct iommu_domain *tegra_smmu_domain_alloc_paging(struct device *dev) { struct tegra_smmu_as *as; - if (type != IOMMU_DOMAIN_UNMANAGED) - return NULL; - as = kzalloc(sizeof(*as), GFP_KERNEL); if (!as) return NULL; @@ -511,23 +508,39 @@ static int tegra_smmu_attach_dev(struct iommu_domain *domain, return err; } -static void tegra_smmu_set_platform_dma(struct device *dev) +static int tegra_smmu_identity_attach(struct iommu_domain *identity_domain, + struct device *dev) { struct iommu_domain *domain = iommu_get_domain_for_dev(dev); struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); - struct tegra_smmu_as *as = to_smmu_as(domain); - struct tegra_smmu *smmu = as->smmu; + struct tegra_smmu_as *as; + struct tegra_smmu *smmu; unsigned int index; if (!fwspec) - return; + return -ENODEV; + + if (domain == identity_domain || !domain) + return 0; + as = to_smmu_as(domain); + smmu = as->smmu; for (index = 0; index < fwspec->num_ids; index++) { tegra_smmu_disable(smmu, fwspec->ids[index], as->id); tegra_smmu_as_unprepare(smmu, as); } + return 0; } +static struct iommu_domain_ops tegra_smmu_identity_ops = { + .attach_dev = tegra_smmu_identity_attach, +}; + +static struct iommu_domain tegra_smmu_identity_domain = { + .type = IOMMU_DOMAIN_IDENTITY, + .ops = &tegra_smmu_identity_ops, +}; + static void tegra_smmu_set_pde(struct tegra_smmu_as *as, unsigned long iova, u32 value) { @@ -962,11 +975,22 @@ static int tegra_smmu_of_xlate(struct device *dev, return iommu_fwspec_add_ids(dev, &id, 1); } +static int tegra_smmu_def_domain_type(struct device *dev) +{ + /* + * FIXME: For now we want to run all translation in IDENTITY mode, due + * to some device quirks. Better would be to just quirk the troubled + * devices. + */ + return IOMMU_DOMAIN_IDENTITY; +} + static const struct iommu_ops tegra_smmu_ops = { - .domain_alloc = tegra_smmu_domain_alloc, + .identity_domain = &tegra_smmu_identity_domain, + .def_domain_type = &tegra_smmu_def_domain_type, + .domain_alloc_paging = tegra_smmu_domain_alloc_paging, .probe_device = tegra_smmu_probe_device, .device_group = tegra_smmu_device_group, - .set_platform_dma_ops = tegra_smmu_set_platform_dma, .of_xlate = tegra_smmu_of_xlate, .pgsize_bitmap = SZ_4K, .default_domain_ops = &(const struct iommu_domain_ops) { diff --git a/include/linux/iommu.h b/include/linux/iommu.h index b6ef263e85c06..d87464a63ff49 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -64,6 +64,7 @@ struct iommu_domain_geometry { #define __IOMMU_DOMAIN_DMA_FQ (1U << 3) /* DMA-API uses flush queue */ #define __IOMMU_DOMAIN_SVA (1U << 4) /* Shared process address space */ +#define __IOMMU_DOMAIN_PLATFORM (1U << 5) #define IOMMU_DOMAIN_ALLOC_FLAGS ~__IOMMU_DOMAIN_DMA_FQ /* @@ -81,6 +82,8 @@ struct iommu_domain_geometry { * invalidation. * IOMMU_DOMAIN_SVA - DMA addresses are shared process addresses * represented by mm_struct's. + * IOMMU_DOMAIN_PLATFORM - Legacy domain for drivers that do their own + * dma_api stuff. Do not use in new drivers. */ #define IOMMU_DOMAIN_BLOCKED (0U) #define IOMMU_DOMAIN_IDENTITY (__IOMMU_DOMAIN_PT) @@ -91,6 +94,7 @@ struct iommu_domain_geometry { __IOMMU_DOMAIN_DMA_API | \ __IOMMU_DOMAIN_DMA_FQ) #define IOMMU_DOMAIN_SVA (__IOMMU_DOMAIN_SVA) +#define IOMMU_DOMAIN_PLATFORM (__IOMMU_DOMAIN_PLATFORM) struct iommu_domain { unsigned type; @@ -235,13 +239,12 @@ struct iommu_iotlb_gather { * use. The information type is one of enum iommu_hw_info_type defined * in include/uapi/linux/iommufd.h. * @domain_alloc: allocate iommu domain + * @domain_alloc_paging: Allocate an iommu_domain that can be used for + * UNMANAGED, DMA, and DMA_FQ domain types. * @probe_device: Add device to iommu driver handling * @release_device: Remove device from iommu driver handling * @probe_finalize: Do final setup work after the device is added to an IOMMU * group and attached to the groups domain - * @set_platform_dma_ops: Returning control back to the platform DMA ops. This op - * is to support old IOMMU drivers, new drivers should use - * default domains, and the common IOMMU DMA ops. * @device_group: find iommu group for a particular device * @get_resv_regions: Request list of reserved regions for a device * @of_xlate: add OF master IDs to iommu grouping @@ -260,6 +263,11 @@ struct iommu_iotlb_gather { * will be blocked by the hardware. * @pgsize_bitmap: bitmap of all possible supported page sizes * @owner: Driver module providing these ops + * @identity_domain: An always available, always attachable identity + * translation. + * @default_domain: If not NULL this will always be set as the default domain. + * This should be an IDENTITY/BLOCKED/PLATFORM domain. + * Do not use in new drivers. */ struct iommu_ops { bool (*capable)(struct device *dev, enum iommu_cap); @@ -267,11 +275,11 @@ struct iommu_ops { /* Domain allocation and freeing by the iommu driver */ struct iommu_domain *(*domain_alloc)(unsigned iommu_domain_type); + struct iommu_domain *(*domain_alloc_paging)(struct device *dev); struct iommu_device *(*probe_device)(struct device *dev); void (*release_device)(struct device *dev); void (*probe_finalize)(struct device *dev); - void (*set_platform_dma_ops)(struct device *dev); struct iommu_group *(*device_group)(struct device *dev); /* Request/Free a list of reserved regions for a device */ @@ -294,6 +302,8 @@ struct iommu_ops { const struct iommu_domain_ops *default_domain_ops; unsigned long pgsize_bitmap; struct module *owner; + struct iommu_domain *identity_domain; + struct iommu_domain *default_domain; }; /**