diff --git a/linuxkm/linuxkm_memory.c b/linuxkm/linuxkm_memory.c index 19dd70a84a..584998779f 100644 --- a/linuxkm/linuxkm_memory.c +++ b/linuxkm/linuxkm_memory.c @@ -71,557 +71,6 @@ void *lkm_realloc(void *ptr, size_t newsize) { } #endif /* HAVE_KVMALLOC */ -#if defined(WOLFSSL_LINUXKM_USE_SAVE_VECTOR_REGISTERS) && defined(CONFIG_X86) - -/* kernel 4.19 -- the most recent LTS before 5.4 -- lacks the necessary safety - * checks in __kernel_fpu_begin(), and lacks TIF_NEED_FPU_LOAD. - */ -#if (LINUX_VERSION_CODE < KERNEL_VERSION(5, 4, 0)) - #error WOLFSSL_LINUXKM_USE_SAVE_VECTOR_REGISTERS on x86 requires kernel 5.4.0 or higher. -#endif - -static unsigned int wc_linuxkm_fpu_states_n_tracked = 0; - -struct wc_thread_fpu_count_ent { - volatile pid_t pid; - unsigned int fpu_state; -}; -struct wc_thread_fpu_count_ent *wc_linuxkm_fpu_states = NULL; - -#ifdef WOLFSSL_COMMERCIAL_LICENSE - -#ifndef LINUXKM_FPU_STATES_FOLLOW_THREADS - #error WOLFSSL_COMMERCIAL_LICENSE requires LINUXKM_FPU_STATES_FOLLOW_THREADS -#endif - -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wunused-parameter" -#pragma GCC diagnostic ignored "-Wnested-externs" -/* avoid dependence on "alternatives_patched" and "xfd_validate_state()". */ -#undef CONFIG_X86_DEBUG_FPU -#include "../kernel/fpu/internal.h" -#include "../kernel/fpu/xstate.h" -#pragma GCC diagnostic pop - -static union wc_linuxkm_fpu_savebuf { - byte buf[1024]; /* must be 64-byte-aligned */ - struct fpstate fpstate; -} *wc_linuxkm_fpu_savebufs = NULL; - -#endif /* WOLFSSL_COMMERCIAL_LICENSE */ - -#define WC_FPU_COUNT_MASK 0x7fffffffU -#define WC_FPU_SAVED_MASK 0x80000000U - -WARN_UNUSED_RESULT int allocate_wolfcrypt_linuxkm_fpu_states(void) -{ - if (wc_linuxkm_fpu_states != NULL) { -#ifdef HAVE_FIPS - /* see note below in wc_linuxkm_fpu_state_assoc_unlikely(). */ - return 0; -#else - static int warned_for_repeat_alloc = 0; - if (! warned_for_repeat_alloc) { - pr_err("attempt at repeat allocation" - " in allocate_wolfcrypt_linuxkm_fpu_states\n"); - warned_for_repeat_alloc = 1; - } - return BAD_STATE_E; -#endif - } - -#ifdef LINUXKM_FPU_STATES_FOLLOW_THREADS - if (nr_cpu_ids >= 16) - wc_linuxkm_fpu_states_n_tracked = nr_cpu_ids * 2; - else - wc_linuxkm_fpu_states_n_tracked = 32; -#else - wc_linuxkm_fpu_states_n_tracked = nr_cpu_ids; -#endif - - wc_linuxkm_fpu_states = - (struct wc_thread_fpu_count_ent *)malloc( - wc_linuxkm_fpu_states_n_tracked * sizeof(wc_linuxkm_fpu_states[0])); - - if (! wc_linuxkm_fpu_states) { - pr_err("allocation of %lu bytes for " - "wc_linuxkm_fpu_states failed.\n", - nr_cpu_ids * sizeof(struct fpu_state *)); - return MEMORY_E; - } - - memset(wc_linuxkm_fpu_states, 0, wc_linuxkm_fpu_states_n_tracked - * sizeof(wc_linuxkm_fpu_states[0])); - -#ifdef WOLFSSL_COMMERCIAL_LICENSE - wc_linuxkm_fpu_savebufs = (union wc_linuxkm_fpu_savebuf *)malloc( - wc_linuxkm_fpu_states_n_tracked * sizeof(*wc_linuxkm_fpu_savebufs)); - if (! wc_linuxkm_fpu_savebufs) { - pr_err("allocation of %lu bytes for " - "wc_linuxkm_fpu_savebufs failed.\n", - WC_LINUXKM_ROUND_UP_P_OF_2(wc_linuxkm_fpu_states_n_tracked) - * sizeof(*wc_linuxkm_fpu_savebufs)); - free(wc_linuxkm_fpu_states); - wc_linuxkm_fpu_states = NULL; - return MEMORY_E; - } - if ((uintptr_t)wc_linuxkm_fpu_savebufs - & (WC_LINUXKM_ROUND_UP_P_OF_2(sizeof(*wc_linuxkm_fpu_savebufs)) - 1)) - { - pr_err("allocation of %lu bytes for " - "wc_linuxkm_fpu_savebufs allocated with wrong alignment 0x%lx.\n", - WC_LINUXKM_ROUND_UP_P_OF_2(wc_linuxkm_fpu_states_n_tracked) - * sizeof(*wc_linuxkm_fpu_savebufs), - (uintptr_t)wc_linuxkm_fpu_savebufs); - free(wc_linuxkm_fpu_savebufs); - wc_linuxkm_fpu_savebufs = NULL; - free(wc_linuxkm_fpu_states); - wc_linuxkm_fpu_states = NULL; - return MEMORY_E; - } - -#endif - - return 0; -} - -void free_wolfcrypt_linuxkm_fpu_states(void) { - struct wc_thread_fpu_count_ent *i, *i_endptr; - pid_t i_pid; - - if (wc_linuxkm_fpu_states == NULL) { - pr_err("free_wolfcrypt_linuxkm_fpu_states called" - " before allocate_wolfcrypt_linuxkm_fpu_states.\n"); - return; - } - - for (i = wc_linuxkm_fpu_states, - i_endptr = &wc_linuxkm_fpu_states[wc_linuxkm_fpu_states_n_tracked]; - i < i_endptr; - ++i) - { - i_pid = __atomic_load_n(&i->pid, __ATOMIC_CONSUME); - if (i_pid == 0) - continue; - if (i->fpu_state != 0) { - pr_err("free_wolfcrypt_linuxkm_fpu_states called" - " with nonzero state 0x%x for pid %d.\n", i->fpu_state, i_pid); - i->fpu_state = 0; - } - } - -#ifdef WOLFSSL_COMMERCIAL_LICENSE - free(wc_linuxkm_fpu_savebufs); - wc_linuxkm_fpu_savebufs = NULL; -#endif - free(wc_linuxkm_fpu_states); - wc_linuxkm_fpu_states = NULL; -} - -#ifdef LINUXKM_FPU_STATES_FOLLOW_THREADS -/* legacy thread-local storage facility for tracking recursive fpu - * pushing/popping - */ -static struct wc_thread_fpu_count_ent *wc_linuxkm_fpu_state_assoc(int create_p) { - struct wc_thread_fpu_count_ent *i, *i_endptr, *i_empty; - pid_t my_pid = task_pid_nr(current), i_pid; - - { - static int _warned_on_null = 0; - if (wc_linuxkm_fpu_states == NULL) - { -#ifdef HAVE_FIPS - /* FIPS needs to use SHA256 for the core verify HMAC, before - * reaching the regular wolfCrypt_Init() logic. to break the - * dependency loop on intelasm builds, we allocate here. - * this is not thread-safe and doesn't need to be. - */ - int ret = allocate_wolfcrypt_linuxkm_fpu_states(); - if (ret != 0) -#endif - { - if (_warned_on_null == 0) { - pr_err("wc_linuxkm_fpu_state_assoc called by pid %d" - " before allocate_wolfcrypt_linuxkm_fpu_states.\n", my_pid); - _warned_on_null = 1; - } - return NULL; - } - } - } - - i_endptr = &wc_linuxkm_fpu_states[wc_linuxkm_fpu_states_n_tracked]; - - for (;;) { - for (i = wc_linuxkm_fpu_states, - i_empty = NULL; - i < i_endptr; - ++i) - { - i_pid = __atomic_load_n(&i->pid, __ATOMIC_CONSUME); - if (i_pid == my_pid) - return i; - if ((i_empty == NULL) && (i_pid == 0)) - i_empty = i; - } - if ((i_empty == NULL) || (! create_p)) - return NULL; - - i_pid = 0; - if (__atomic_compare_exchange_n( - &(i_empty->pid), - &i_pid, - my_pid, - 0 /* weak */, - __ATOMIC_SEQ_CST /* success_memmodel */, - __ATOMIC_SEQ_CST /* failure_memmodel */)) - { - return i_empty; - } - } -} - -#else /* !LINUXKM_FPU_STATES_FOLLOW_THREADS */ - -/* lock-free O(1)-lookup CPU-local storage facility for tracking recursive fpu - * pushing/popping. - * - * caller must have already called kernel_fpu_begin() or preempt_disable() - * before entering this or the streamlined inline version of it below. - */ -static struct wc_thread_fpu_count_ent *wc_linuxkm_fpu_state_assoc_unlikely(int create_p) { - int my_cpu = raw_smp_processor_id(); - pid_t my_pid = task_pid_nr(current), slot_pid; - struct wc_thread_fpu_count_ent *slot; - - { - static int _warned_on_null = 0; - if (wc_linuxkm_fpu_states == NULL) - { -#ifdef HAVE_FIPS - /* FIPS needs to use SHA256 for the core verify HMAC, before - * reaching the regular wolfCrypt_Init() logic. to break the - * dependency loop on intelasm builds, we allocate here. - * this is not thread-safe and doesn't need to be. - */ - int ret = allocate_wolfcrypt_linuxkm_fpu_states(); - if (ret != 0) -#endif - { - if (_warned_on_null == 0) { - pr_err("wc_linuxkm_fpu_state_assoc called by pid %d" - " before allocate_wolfcrypt_linuxkm_fpu_states.\n", my_pid); - _warned_on_null = 1; - } - return NULL; - } - } - } - - slot = &wc_linuxkm_fpu_states[my_cpu]; - slot_pid = __atomic_load_n(&slot->pid, __ATOMIC_CONSUME); - if (slot_pid == my_pid) { - if (create_p) { - static int _warned_on_redundant_create_p = 0; - if (_warned_on_redundant_create_p < 10) { - pr_err("wc_linuxkm_fpu_state_assoc called with create_p=1 by" - " pid %d on cpu %d with cpu slot already reserved by" - " said pid.\n", my_pid, my_cpu); - ++_warned_on_redundant_create_p; - } - } - return slot; - } - if (create_p) { - if (slot_pid == 0) { - __atomic_store_n(&slot->pid, my_pid, __ATOMIC_RELEASE); - return slot; - } else { - /* if the slot is already occupied, that can be benign due to a - * migration, but it will require fixup by the thread that owns the - * slot, which will happen when it releases its lock, or sooner (see - * below). - */ - static int _warned_on_mismatched_pid = 0; - if (_warned_on_mismatched_pid < 10) { - pr_warn("wc_linuxkm_fpu_state_assoc called by pid %d on cpu %d" - " but cpu slot already reserved by pid %d.\n", - my_pid, my_cpu, slot_pid); - ++_warned_on_mismatched_pid; - } - return NULL; - } - } else { - /* check for migration. this can happen despite our best efforts if any - * I/O occured while locked, e.g. kernel messages like "uninitialized - * urandom read". since we're locked now, we can safely migrate the - * entry in wc_linuxkm_fpu_states[], freeing up the slot on the previous - * cpu. - */ - unsigned int cpu_i; - for (cpu_i = 0; cpu_i < wc_linuxkm_fpu_states_n_tracked; ++cpu_i) { - if (__atomic_load_n( - &wc_linuxkm_fpu_states[cpu_i].pid, - __ATOMIC_CONSUME) - == my_pid) - { - wc_linuxkm_fpu_states[my_cpu] = wc_linuxkm_fpu_states[cpu_i]; - __atomic_store_n(&wc_linuxkm_fpu_states[cpu_i].fpu_state, 0, - __ATOMIC_RELEASE); - __atomic_store_n(&wc_linuxkm_fpu_states[cpu_i].pid, 0, - __ATOMIC_RELEASE); - return &wc_linuxkm_fpu_states[my_cpu]; - } - } - return NULL; - } -} - -static inline struct wc_thread_fpu_count_ent *wc_linuxkm_fpu_state_assoc( - int create_p) -{ - int my_cpu = raw_smp_processor_id(); /* my_cpu is only trustworthy if we're - * already nonpreemptible -- we'll - * determine that soon enough by - * checking if the pid matches or, - * failing that, if create_p. - */ - pid_t my_pid = task_pid_nr(current), slot_pid; - struct wc_thread_fpu_count_ent *slot; - - if (unlikely(wc_linuxkm_fpu_states == NULL)) - return wc_linuxkm_fpu_state_assoc_unlikely(create_p); - - slot = &wc_linuxkm_fpu_states[my_cpu]; - slot_pid = __atomic_load_n(&slot->pid, __ATOMIC_CONSUME); - if (slot_pid == my_pid) { - if (unlikely(create_p)) - return wc_linuxkm_fpu_state_assoc_unlikely(create_p); - else - return slot; - } - if (likely(create_p)) { - if (likely(slot_pid == 0)) { - __atomic_store_n(&slot->pid, my_pid, __ATOMIC_RELEASE); - return slot; - } else { - return wc_linuxkm_fpu_state_assoc_unlikely(create_p); - } - } else { - return wc_linuxkm_fpu_state_assoc_unlikely(create_p); - } -} - -#endif /* !LINUXKM_FPU_STATES_FOLLOW_THREADS */ - -#ifdef WOLFSSL_COMMERCIAL_LICENSE -static struct fpstate *wc_linuxkm_fpstate_buf_from_fpu_state( - struct wc_thread_fpu_count_ent *state) -{ - size_t i = (size_t)(state - wc_linuxkm_fpu_states) / sizeof(*state); - return &wc_linuxkm_fpu_savebufs[i].fpstate; -} -#endif - -static void wc_linuxkm_fpu_state_release_unlikely( - struct wc_thread_fpu_count_ent *ent) -{ - if (ent->fpu_state != 0) { - static int warned_nonzero_fpu_state = 0; - if (! warned_nonzero_fpu_state) { - pr_err("wc_linuxkm_fpu_state_free for pid %d" - " with nonzero fpu_state 0x%x.\n", ent->pid, ent->fpu_state); - warned_nonzero_fpu_state = 1; - } - ent->fpu_state = 0; - } - __atomic_store_n(&ent->pid, 0, __ATOMIC_RELEASE); -} - -static inline void wc_linuxkm_fpu_state_release( - struct wc_thread_fpu_count_ent *ent) -{ - if (unlikely(ent->fpu_state != 0)) - return wc_linuxkm_fpu_state_release_unlikely(ent); - __atomic_store_n(&ent->pid, 0, __ATOMIC_RELEASE); -} - -WARN_UNUSED_RESULT int can_save_vector_registers_x86(void) -{ - if (irq_fpu_usable()) - return 1; - else if (in_nmi() || (hardirq_count() > 0) || (softirq_count() > 0)) - return 0; - else if (test_thread_flag(TIF_NEED_FPU_LOAD)) - return 1; - return 0; -} - -WARN_UNUSED_RESULT int save_vector_registers_x86(void) -{ -#ifdef LINUXKM_FPU_STATES_FOLLOW_THREADS - struct wc_thread_fpu_count_ent *pstate = wc_linuxkm_fpu_state_assoc(1); -#else - struct wc_thread_fpu_count_ent *pstate = wc_linuxkm_fpu_state_assoc(0); -#endif - - /* allow for nested calls */ -#ifdef LINUXKM_FPU_STATES_FOLLOW_THREADS - if (pstate == NULL) - return MEMORY_E; -#endif - if ( -#ifndef LINUXKM_FPU_STATES_FOLLOW_THREADS - (pstate != NULL) && -#endif - (pstate->fpu_state != 0U)) - { - if (unlikely((pstate->fpu_state & WC_FPU_COUNT_MASK) - == WC_FPU_COUNT_MASK)) - { - pr_err("save_vector_registers_x86 recursion register overflow for " - "pid %d.\n", pstate->pid); - return BAD_STATE_E; - } else { - ++pstate->fpu_state; - return 0; - } - } - - if (irq_fpu_usable() -#if (LINUX_VERSION_CODE < KERNEL_VERSION(5, 17, 0)) - /* work around a kernel bug -- see linux commit 59f5ede3bc0f0. - * what we really want here is this_cpu_read(in_kernel_fpu), but - * in_kernel_fpu is an unexported static array. - */ - && !test_thread_flag(TIF_NEED_FPU_LOAD) -#endif - ) - { -#ifdef WOLFSSL_COMMERCIAL_LICENSE - struct fpstate *fpstate = wc_linuxkm_fpstate_buf_from_fpu_state(pstate); - fpregs_lock(); - fpstate->xfeatures = ~0UL; - os_xsave(fpstate); -#else /* !WOLFSSL_COMMERCIAL_LICENSE */ - #if defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT_COUNT) && \ - (LINUX_VERSION_CODE >= KERNEL_VERSION(5, 7, 0)) - /* inhibit migration, which gums up the algorithm in - * kernel_fpu_{begin,end}(). - */ - migrate_disable(); - #endif - kernel_fpu_begin(); - -#ifndef LINUXKM_FPU_STATES_FOLLOW_THREADS - pstate = wc_linuxkm_fpu_state_assoc(1); - if (pstate == NULL) { - kernel_fpu_end(); - #if defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT_COUNT) && \ - (LINUX_VERSION_CODE >= KERNEL_VERSION(5, 7, 0)) && \ - !defined(WOLFSSL_COMMERCIAL_LICENSE) - migrate_enable(); - #endif - return BAD_STATE_E; - } -#endif - -#endif /* !WOLFSSL_COMMERCIAL_LICENSE */ - /* set msb to 0 to trigger kernel_fpu_end() at cleanup. */ - pstate->fpu_state = 1U; - } else if (in_nmi() || (hardirq_count() > 0) || (softirq_count() > 0)) { - static int warned_fpu_forbidden = 0; - if (! warned_fpu_forbidden) - pr_err("save_vector_registers_x86 called from IRQ handler.\n"); -#ifdef LINUXKM_FPU_STATES_FOLLOW_THREADS - wc_linuxkm_fpu_state_release(pstate); -#endif - return BAD_STATE_E; - } else if (!test_thread_flag(TIF_NEED_FPU_LOAD)) { - static int warned_fpu_forbidden = 0; - if (! warned_fpu_forbidden) - pr_err("save_vector_registers_x86 called with !irq_fpu_usable from" - " thread without previous FPU save.\n"); -#ifdef LINUXKM_FPU_STATES_FOLLOW_THREADS - wc_linuxkm_fpu_state_release(pstate); -#endif - return BAD_STATE_E; - } else { - /* assume already safely in_kernel_fpu from caller, but recursively - * preempt_disable() to be extra-safe. - */ - preempt_disable(); -#if defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT_COUNT) && \ - (LINUX_VERSION_CODE >= KERNEL_VERSION(5, 7, 0)) && \ - !defined(WOLFSSL_COMMERCIAL_LICENSE) - migrate_disable(); -#endif -#ifndef LINUXKM_FPU_STATES_FOLLOW_THREADS - pstate = wc_linuxkm_fpu_state_assoc(1); - if (pstate == NULL) { - #if defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT_COUNT) && \ - (LINUX_VERSION_CODE >= KERNEL_VERSION(5, 7, 0)) && \ - !defined(WOLFSSL_COMMERCIAL_LICENSE) - migrate_enable(); - #endif - preempt_enable(); - return BAD_STATE_E; - } -#endif - /* set msb to 1 to inhibit kernel_fpu_end() at cleanup. */ - pstate->fpu_state = - WC_FPU_SAVED_MASK + 1U; - } - - return 0; -} - -void restore_vector_registers_x86(void) -{ - struct wc_thread_fpu_count_ent *pstate = wc_linuxkm_fpu_state_assoc(0); - if (unlikely(pstate == NULL)) { - pr_err("restore_vector_registers_x86 called by pid %d on CPU %d " - "with no saved state.\n", task_pid_nr(current), - raw_smp_processor_id()); - return; - } - - if ((--pstate->fpu_state & WC_FPU_COUNT_MASK) > 0U) { - return; - } - - if (pstate->fpu_state == 0U) { -#ifdef WOLFSSL_COMMERCIAL_LICENSE - struct fpstate *fpstate = wc_linuxkm_fpstate_buf_from_fpu_state(pstate); - os_xrstor(fpstate, fpstate->xfeatures); - fpregs_unlock(); -#else - #ifndef LINUXKM_FPU_STATES_FOLLOW_THREADS - wc_linuxkm_fpu_state_release(pstate); - #endif - kernel_fpu_end(); -#endif - } else { - pstate->fpu_state = 0U; - #ifndef LINUXKM_FPU_STATES_FOLLOW_THREADS - wc_linuxkm_fpu_state_release(pstate); - #endif - preempt_enable(); - } -#if defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT_COUNT) && \ - (LINUX_VERSION_CODE >= KERNEL_VERSION(5, 7, 0)) && \ - !defined(WOLFSSL_COMMERCIAL_LICENSE) - migrate_enable(); -#endif - -#ifdef LINUXKM_FPU_STATES_FOLLOW_THREADS - wc_linuxkm_fpu_state_release(pstate); -#endif - - return; -} -#endif /* WOLFSSL_LINUXKM_USE_SAVE_VECTOR_REGISTERS && CONFIG_X86 */ - #if defined(__PIE__) && (LINUX_VERSION_CODE >= KERNEL_VERSION(6, 1, 0)) /* needed in 6.1+ because show_free_areas() static definition in mm.h calls * __show_free_areas(), which isn't exported (neither was show_free_areas()). diff --git a/linuxkm/linuxkm_wc_port.h b/linuxkm/linuxkm_wc_port.h index 12dfbde266..1623c3c4b1 100644 --- a/linuxkm/linuxkm_wc_port.h +++ b/linuxkm/linuxkm_wc_port.h @@ -333,6 +333,13 @@ #if defined(WOLFSSL_LINUXKM_USE_SAVE_VECTOR_REGISTERS) && \ defined(CONFIG_X86) + + extern __must_check int allocate_wolfcrypt_linuxkm_fpu_states(void); + extern void free_wolfcrypt_linuxkm_fpu_states(void); + extern __must_check int can_save_vector_registers_x86(void); + extern __must_check int save_vector_registers_x86(void); + extern void restore_vector_registers_x86(void); + #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0) #include #else @@ -368,8 +375,30 @@ #ifndef RESTORE_VECTOR_REGISTERS #define RESTORE_VECTOR_REGISTERS() restore_vector_registers_x86() #endif + #elif defined(WOLFSSL_LINUXKM_USE_SAVE_VECTOR_REGISTERS) && (defined(CONFIG_ARM) || defined(CONFIG_ARM64)) + + #error kernel module ARM SIMD is not yet tested or usable. + #include + + static WARN_UNUSED_RESULT inline int save_vector_registers_arm(void) + { + preempt_disable(); + if (! may_use_simd()) { + preempt_enable(); + return BAD_STATE_E; + } else { + fpsimd_preserve_current_state(); + return 0; + } + } + static inline void restore_vector_registers_arm(void) + { + fpsimd_restore_current_state(); + preempt_enable(); + } + #ifndef SAVE_VECTOR_REGISTERS #define SAVE_VECTOR_REGISTERS(fail_clause) { int _svr_ret = save_vector_registers_arm(); if (_svr_ret != 0) { fail_clause } } #endif @@ -382,9 +411,10 @@ #ifndef RESTORE_VECTOR_REGISTERS #define RESTORE_VECTOR_REGISTERS() restore_vector_registers_arm() #endif + #elif defined(WOLFSSL_LINUXKM_USE_SAVE_VECTOR_REGISTERS) #error WOLFSSL_LINUXKM_USE_SAVE_VECTOR_REGISTERS is set for an unsupported architecture. - #endif + #endif /* WOLFSSL_LINUXKM_USE_SAVE_VECTOR_REGISTERS */ _Pragma("GCC diagnostic pop"); @@ -529,39 +559,15 @@ #endif struct task_struct *(*get_current)(void); - int (*preempt_count)(void); #ifdef WOLFSSL_LINUXKM_USE_SAVE_VECTOR_REGISTERS - #if LINUX_VERSION_CODE < KERNEL_VERSION(6, 2, 0) - typeof(cpu_number) *cpu_number; - #else - typeof(pcpu_hot) *pcpu_hot; - #endif - typeof(nr_cpu_ids) *nr_cpu_ids; - - #if defined(CONFIG_SMP) && (LINUX_VERSION_CODE >= KERNEL_VERSION(5, 7, 0)) && !defined(WOLFSSL_COMMERCIAL_LICENSE) - /* note the current and needed version of these were added in af449901b8 (2020-Sep-17) */ - typeof(migrate_disable) *migrate_disable; - typeof(migrate_enable) *migrate_enable; - #endif - #ifdef CONFIG_X86 - typeof(irq_fpu_usable) *irq_fpu_usable; - #ifdef WOLFSSL_COMMERCIAL_LICENSE - typeof(fpregs_lock) *fpregs_lock; - typeof(fpregs_lock) *fpregs_unlock; - #else /* !WOLFSSL_COMMERCIAL_LICENSE */ - /* kernel_fpu_begin() replaced by kernel_fpu_begin_mask() in commit e4512289, - * released in kernel 5.11, backported to 5.4.93 - */ - #ifdef kernel_fpu_begin - typeof(kernel_fpu_begin_mask) *kernel_fpu_begin_mask; - #else - typeof(kernel_fpu_begin) *kernel_fpu_begin; - #endif - typeof(kernel_fpu_end) *kernel_fpu_end; - #endif /* !defined(WOLFSSL_COMMERCIAL_LICENSE) */ + typeof(allocate_wolfcrypt_linuxkm_fpu_states) *allocate_wolfcrypt_linuxkm_fpu_states; + typeof(can_save_vector_registers_x86) *can_save_vector_registers_x86; + typeof(free_wolfcrypt_linuxkm_fpu_states) *free_wolfcrypt_linuxkm_fpu_states; + typeof(restore_vector_registers_x86) *restore_vector_registers_x86; + typeof(save_vector_registers_x86) *save_vector_registers_x86; #else /* !CONFIG_X86 */ #error WOLFSSL_LINUXKM_USE_SAVE_VECTOR_REGISTERS is set for an unsupported architecture. #endif /* arch */ @@ -697,38 +703,15 @@ #undef get_current #define get_current (wolfssl_linuxkm_get_pie_redirect_table()->get_current) - #undef preempt_count - #define preempt_count (wolfssl_linuxkm_get_pie_redirect_table()->preempt_count) - #ifdef WOLFSSL_LINUXKM_USE_SAVE_VECTOR_REGISTERS - #if LINUX_VERSION_CODE < KERNEL_VERSION(6, 2, 0) - #define cpu_number (*(wolfssl_linuxkm_get_pie_redirect_table()->cpu_number)) - #else - #define pcpu_hot (*(wolfssl_linuxkm_get_pie_redirect_table()->pcpu_hot)) - #endif - #define nr_cpu_ids (*(wolfssl_linuxkm_get_pie_redirect_table()->nr_cpu_ids)) - - #if defined(CONFIG_SMP) && (LINUX_VERSION_CODE >= KERNEL_VERSION(5, 7, 0)) && !defined(WOLFSSL_COMMERCIAL_LICENSE) - #define migrate_disable (*(wolfssl_linuxkm_get_pie_redirect_table()->migrate_disable)) - #define migrate_enable (*(wolfssl_linuxkm_get_pie_redirect_table()->migrate_enable)) - #endif - - #ifdef CONFIG_X86 - #define irq_fpu_usable (wolfssl_linuxkm_get_pie_redirect_table()->irq_fpu_usable) - #ifdef WOLFSSL_COMMERCIAL_LICENSE - #define fpregs_lock() (wolfssl_linuxkm_get_pie_redirect_table()->fpregs_lock()) - #define fpregs_unlock() (wolfssl_linuxkm_get_pie_redirect_table()->fpregs_unlock()) - #else /* !defined(WOLFSSL_COMMERCIAL_LICENSE) */ - #ifdef kernel_fpu_begin - #define kernel_fpu_begin_mask (wolfssl_linuxkm_get_pie_redirect_table()->kernel_fpu_begin_mask) - #else - #define kernel_fpu_begin (wolfssl_linuxkm_get_pie_redirect_table()->kernel_fpu_begin) - #endif - #define kernel_fpu_end (wolfssl_linuxkm_get_pie_redirect_table()->kernel_fpu_end) - #endif /* !defined(WOLFSSL_COMMERCIAL_LICENSE) */ - #else /* !CONFIG_X86 */ - #error WOLFSSL_LINUXKM_USE_SAVE_VECTOR_REGISTERS is set for an unsupported architecture. - #endif /* archs */ + #if defined(WOLFSSL_LINUXKM_USE_SAVE_VECTOR_REGISTERS) && defined(CONFIG_X86) + #define allocate_wolfcrypt_linuxkm_fpu_states (wolfssl_linuxkm_get_pie_redirect_table()->allocate_wolfcrypt_linuxkm_fpu_states) + #define can_save_vector_registers_x86 (wolfssl_linuxkm_get_pie_redirect_table()->can_save_vector_registers_x86) + #define free_wolfcrypt_linuxkm_fpu_states (wolfssl_linuxkm_get_pie_redirect_table()->free_wolfcrypt_linuxkm_fpu_states) + #define restore_vector_registers_x86 (wolfssl_linuxkm_get_pie_redirect_table()->restore_vector_registers_x86) + #define save_vector_registers_x86 (wolfssl_linuxkm_get_pie_redirect_table()->save_vector_registers_x86) + #elif defined(WOLFSSL_LINUXKM_USE_SAVE_VECTOR_REGISTERS) + #error WOLFSSL_LINUXKM_USE_SAVE_VECTOR_REGISTERS is set for an unsupported architecture. #endif /* WOLFSSL_LINUXKM_USE_SAVE_VECTOR_REGISTERS */ #define __mutex_init (wolfssl_linuxkm_get_pie_redirect_table()->__mutex_init) @@ -762,41 +745,6 @@ #endif /* USE_WOLFSSL_LINUXKM_PIE_REDIRECT_TABLE */ -#ifdef WOLFSSL_LINUXKM_USE_SAVE_VECTOR_REGISTERS - -#ifdef CONFIG_X86 - - extern __must_check int allocate_wolfcrypt_linuxkm_fpu_states(void); - extern void free_wolfcrypt_linuxkm_fpu_states(void); - extern __must_check int can_save_vector_registers_x86(void); - extern __must_check int save_vector_registers_x86(void); - extern void restore_vector_registers_x86(void); - -#elif defined(CONFIG_ARM) || defined(CONFIG_ARM64) - - #error kernel module ARM SIMD is not yet tested or usable. - - static WARN_UNUSED_RESULT inline int save_vector_registers_arm(void) - { - preempt_disable(); - if (! may_use_simd()) { - preempt_enable(); - return BAD_STATE_E; - } else { - fpsimd_preserve_current_state(); - return 0; - } - } - static inline void restore_vector_registers_arm(void) - { - fpsimd_restore_current_state(); - preempt_enable(); - } - -#endif - -#endif /* WOLFSSL_LINUXKM_USE_SAVE_VECTOR_REGISTERS */ - /* remove this multifariously conflicting macro, picked up from * Linux arch//include/asm/current.h. */ diff --git a/linuxkm/lkcapi_glue.c b/linuxkm/lkcapi_glue.c index 4e474227ce..c38d7d8665 100644 --- a/linuxkm/lkcapi_glue.c +++ b/linuxkm/lkcapi_glue.c @@ -20,6 +20,8 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA */ +/* included by linuxkm/module_hooks.c */ + #ifndef LINUXKM_LKCAPI_REGISTER #error lkcapi_glue.c included in non-LINUXKM_LKCAPI_REGISTER project. #endif diff --git a/linuxkm/module_hooks.c b/linuxkm/module_hooks.c index 74086513b8..a04c5d3ab3 100644 --- a/linuxkm/module_hooks.c +++ b/linuxkm/module_hooks.c @@ -128,6 +128,10 @@ extern int wolfcrypt_benchmark_main(int argc, char** argv); #include "linuxkm/lkcapi_glue.c" #endif +#if defined(WOLFSSL_LINUXKM_USE_SAVE_VECTOR_REGISTERS) && defined(CONFIG_X86) + #include "linuxkm/x86_vector_register_glue.c" +#endif + #if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 0, 0) static int __init wolfssl_init(void) #else @@ -379,11 +383,6 @@ static struct task_struct *my_get_current_thread(void) { return get_current(); } -/* ditto for preempt_count(). */ -static int my_preempt_count(void) { - return preempt_count(); -} - #if defined(WOLFSSL_LINUXKM_SIMD_X86) && defined(WOLFSSL_COMMERCIAL_LICENSE) /* ditto for fpregs_lock/fpregs_unlock */ @@ -492,41 +491,15 @@ static int set_up_wolfssl_linuxkm_pie_redirect_table(void) { #endif wolfssl_linuxkm_pie_redirect_table.get_current = my_get_current_thread; - wolfssl_linuxkm_pie_redirect_table.preempt_count = my_preempt_count; - -#ifdef WOLFSSL_LINUXKM_USE_SAVE_VECTOR_REGISTERS - - #if LINUX_VERSION_CODE < KERNEL_VERSION(6, 2, 0) - wolfssl_linuxkm_pie_redirect_table.cpu_number = &cpu_number; - #else - wolfssl_linuxkm_pie_redirect_table.pcpu_hot = &pcpu_hot; - #endif - wolfssl_linuxkm_pie_redirect_table.nr_cpu_ids = &nr_cpu_ids; - - #if defined(CONFIG_SMP) && \ - (LINUX_VERSION_CODE >= KERNEL_VERSION(5, 7, 0)) && \ - !defined(WOLFSSL_COMMERCIAL_LICENSE) - wolfssl_linuxkm_pie_redirect_table.migrate_disable = &migrate_disable; - wolfssl_linuxkm_pie_redirect_table.migrate_enable = &migrate_enable; - #endif - -#ifdef WOLFSSL_LINUXKM_SIMD_X86 - wolfssl_linuxkm_pie_redirect_table.irq_fpu_usable = irq_fpu_usable; - #ifdef WOLFSSL_COMMERCIAL_LICENSE - wolfssl_linuxkm_pie_redirect_table.fpregs_lock = my_fpregs_lock; - wolfssl_linuxkm_pie_redirect_table.fpregs_unlock = my_fpregs_unlock; - #else /* !defined(WOLFSSL_COMMERCIAL_LICENSE) */ - #ifdef kernel_fpu_begin - wolfssl_linuxkm_pie_redirect_table.kernel_fpu_begin_mask = - kernel_fpu_begin_mask; - #else - wolfssl_linuxkm_pie_redirect_table.kernel_fpu_begin = - kernel_fpu_begin; - #endif - wolfssl_linuxkm_pie_redirect_table.kernel_fpu_end = kernel_fpu_end; - #endif /* !defined(WOLFSSL_COMMERCIAL_LICENSE) */ -#endif /* WOLFSSL_LINUXKM_SIMD_X86 */ +#if defined(WOLFSSL_LINUXKM_USE_SAVE_VECTOR_REGISTERS) && defined(CONFIG_X86) + wolfssl_linuxkm_pie_redirect_table.allocate_wolfcrypt_linuxkm_fpu_states = allocate_wolfcrypt_linuxkm_fpu_states; + wolfssl_linuxkm_pie_redirect_table.can_save_vector_registers_x86 = can_save_vector_registers_x86; + wolfssl_linuxkm_pie_redirect_table.free_wolfcrypt_linuxkm_fpu_states = free_wolfcrypt_linuxkm_fpu_states; + wolfssl_linuxkm_pie_redirect_table.restore_vector_registers_x86 = restore_vector_registers_x86; + wolfssl_linuxkm_pie_redirect_table.save_vector_registers_x86 = save_vector_registers_x86; +#elif defined(WOLFSSL_LINUXKM_USE_SAVE_VECTOR_REGISTERS) + #error WOLFSSL_LINUXKM_USE_SAVE_VECTOR_REGISTERS is set for an unsupported architecture. #endif /* WOLFSSL_LINUXKM_USE_SAVE_VECTOR_REGISTERS */ wolfssl_linuxkm_pie_redirect_table.__mutex_init = __mutex_init; diff --git a/linuxkm/x86_vector_register_glue.c b/linuxkm/x86_vector_register_glue.c new file mode 100644 index 0000000000..ea84201c38 --- /dev/null +++ b/linuxkm/x86_vector_register_glue.c @@ -0,0 +1,575 @@ +/* x86_vector_register_glue.c -- glue logic to save and restore vector registers + * on x86 + * + * Copyright (C) 2006-2024 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +/* included by linuxkm/module_hooks.c */ + +#if !defined(WOLFSSL_LINUXKM_USE_SAVE_VECTOR_REGISTERS) || !defined(CONFIG_X86) + #error x86_vector_register_glue.c included in non-vectorized/non-x86 project. +#endif + +/* kernel 4.19 -- the most recent LTS before 5.4 -- lacks the necessary safety + * checks in __kernel_fpu_begin(), and lacks TIF_NEED_FPU_LOAD. + */ +#if (LINUX_VERSION_CODE < KERNEL_VERSION(5, 4, 0)) + #error WOLFSSL_LINUXKM_USE_SAVE_VECTOR_REGISTERS on x86 requires kernel 5.4.0 or higher. +#endif + +static unsigned int wc_linuxkm_fpu_states_n_tracked = 0; + +struct wc_thread_fpu_count_ent { + volatile pid_t pid; + unsigned int fpu_state; +}; +struct wc_thread_fpu_count_ent *wc_linuxkm_fpu_states = NULL; + +#ifdef WOLFSSL_COMMERCIAL_LICENSE + +#ifndef LINUXKM_FPU_STATES_FOLLOW_THREADS + #error WOLFSSL_COMMERCIAL_LICENSE requires LINUXKM_FPU_STATES_FOLLOW_THREADS +#endif + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-parameter" +#pragma GCC diagnostic ignored "-Wnested-externs" +/* avoid dependence on "alternatives_patched" and "xfd_validate_state()". */ +#undef CONFIG_X86_DEBUG_FPU +#include "../kernel/fpu/internal.h" +#include "../kernel/fpu/xstate.h" +#pragma GCC diagnostic pop + +static union wc_linuxkm_fpu_savebuf { + byte buf[1024]; /* must be 64-byte-aligned */ + struct fpstate fpstate; +} *wc_linuxkm_fpu_savebufs = NULL; + +#endif /* WOLFSSL_COMMERCIAL_LICENSE */ + +#define WC_FPU_COUNT_MASK 0x7fffffffU +#define WC_FPU_SAVED_MASK 0x80000000U + +WARN_UNUSED_RESULT int allocate_wolfcrypt_linuxkm_fpu_states(void) +{ + if (wc_linuxkm_fpu_states != NULL) { +#ifdef HAVE_FIPS + /* see note below in wc_linuxkm_fpu_state_assoc_unlikely(). */ + return 0; +#else + static int warned_for_repeat_alloc = 0; + if (! warned_for_repeat_alloc) { + pr_err("attempt at repeat allocation" + " in allocate_wolfcrypt_linuxkm_fpu_states\n"); + warned_for_repeat_alloc = 1; + } + return BAD_STATE_E; +#endif + } + +#ifdef LINUXKM_FPU_STATES_FOLLOW_THREADS + if (nr_cpu_ids >= 16) + wc_linuxkm_fpu_states_n_tracked = nr_cpu_ids * 2; + else + wc_linuxkm_fpu_states_n_tracked = 32; +#else + wc_linuxkm_fpu_states_n_tracked = nr_cpu_ids; +#endif + + wc_linuxkm_fpu_states = + (struct wc_thread_fpu_count_ent *)malloc( + wc_linuxkm_fpu_states_n_tracked * sizeof(wc_linuxkm_fpu_states[0])); + + if (! wc_linuxkm_fpu_states) { + pr_err("allocation of %lu bytes for " + "wc_linuxkm_fpu_states failed.\n", + nr_cpu_ids * sizeof(struct fpu_state *)); + return MEMORY_E; + } + + memset(wc_linuxkm_fpu_states, 0, wc_linuxkm_fpu_states_n_tracked + * sizeof(wc_linuxkm_fpu_states[0])); + +#ifdef WOLFSSL_COMMERCIAL_LICENSE + wc_linuxkm_fpu_savebufs = (union wc_linuxkm_fpu_savebuf *)malloc( + wc_linuxkm_fpu_states_n_tracked * sizeof(*wc_linuxkm_fpu_savebufs)); + if (! wc_linuxkm_fpu_savebufs) { + pr_err("allocation of %lu bytes for " + "wc_linuxkm_fpu_savebufs failed.\n", + WC_LINUXKM_ROUND_UP_P_OF_2(wc_linuxkm_fpu_states_n_tracked) + * sizeof(*wc_linuxkm_fpu_savebufs)); + free(wc_linuxkm_fpu_states); + wc_linuxkm_fpu_states = NULL; + return MEMORY_E; + } + if ((uintptr_t)wc_linuxkm_fpu_savebufs + & (WC_LINUXKM_ROUND_UP_P_OF_2(sizeof(*wc_linuxkm_fpu_savebufs)) - 1)) + { + pr_err("allocation of %lu bytes for " + "wc_linuxkm_fpu_savebufs allocated with wrong alignment 0x%lx.\n", + WC_LINUXKM_ROUND_UP_P_OF_2(wc_linuxkm_fpu_states_n_tracked) + * sizeof(*wc_linuxkm_fpu_savebufs), + (uintptr_t)wc_linuxkm_fpu_savebufs); + free(wc_linuxkm_fpu_savebufs); + wc_linuxkm_fpu_savebufs = NULL; + free(wc_linuxkm_fpu_states); + wc_linuxkm_fpu_states = NULL; + return MEMORY_E; + } + +#endif + + return 0; +} + +void free_wolfcrypt_linuxkm_fpu_states(void) { + struct wc_thread_fpu_count_ent *i, *i_endptr; + pid_t i_pid; + + if (wc_linuxkm_fpu_states == NULL) { + pr_err("free_wolfcrypt_linuxkm_fpu_states called" + " before allocate_wolfcrypt_linuxkm_fpu_states.\n"); + return; + } + + for (i = wc_linuxkm_fpu_states, + i_endptr = &wc_linuxkm_fpu_states[wc_linuxkm_fpu_states_n_tracked]; + i < i_endptr; + ++i) + { + i_pid = __atomic_load_n(&i->pid, __ATOMIC_CONSUME); + if (i_pid == 0) + continue; + if (i->fpu_state != 0) { + pr_err("free_wolfcrypt_linuxkm_fpu_states called" + " with nonzero state 0x%x for pid %d.\n", i->fpu_state, i_pid); + i->fpu_state = 0; + } + } + +#ifdef WOLFSSL_COMMERCIAL_LICENSE + free(wc_linuxkm_fpu_savebufs); + wc_linuxkm_fpu_savebufs = NULL; +#endif + free(wc_linuxkm_fpu_states); + wc_linuxkm_fpu_states = NULL; +} + +#ifdef LINUXKM_FPU_STATES_FOLLOW_THREADS +/* legacy thread-local storage facility for tracking recursive fpu + * pushing/popping + */ +static struct wc_thread_fpu_count_ent *wc_linuxkm_fpu_state_assoc(int create_p) { + struct wc_thread_fpu_count_ent *i, *i_endptr, *i_empty; + pid_t my_pid = task_pid_nr(current), i_pid; + + { + static int _warned_on_null = 0; + if (wc_linuxkm_fpu_states == NULL) + { +#ifdef HAVE_FIPS + /* FIPS needs to use SHA256 for the core verify HMAC, before + * reaching the regular wolfCrypt_Init() logic. to break the + * dependency loop on intelasm builds, we allocate here. + * this is not thread-safe and doesn't need to be. + */ + int ret = allocate_wolfcrypt_linuxkm_fpu_states(); + if (ret != 0) +#endif + { + if (_warned_on_null == 0) { + pr_err("wc_linuxkm_fpu_state_assoc called by pid %d" + " before allocate_wolfcrypt_linuxkm_fpu_states.\n", my_pid); + _warned_on_null = 1; + } + return NULL; + } + } + } + + i_endptr = &wc_linuxkm_fpu_states[wc_linuxkm_fpu_states_n_tracked]; + + for (;;) { + for (i = wc_linuxkm_fpu_states, + i_empty = NULL; + i < i_endptr; + ++i) + { + i_pid = __atomic_load_n(&i->pid, __ATOMIC_CONSUME); + if (i_pid == my_pid) + return i; + if ((i_empty == NULL) && (i_pid == 0)) + i_empty = i; + } + if ((i_empty == NULL) || (! create_p)) + return NULL; + + i_pid = 0; + if (__atomic_compare_exchange_n( + &(i_empty->pid), + &i_pid, + my_pid, + 0 /* weak */, + __ATOMIC_SEQ_CST /* success_memmodel */, + __ATOMIC_SEQ_CST /* failure_memmodel */)) + { + return i_empty; + } + } +} + +#else /* !LINUXKM_FPU_STATES_FOLLOW_THREADS */ + +/* lock-free O(1)-lookup CPU-local storage facility for tracking recursive fpu + * pushing/popping. + * + * caller must have already called kernel_fpu_begin() or preempt_disable() + * before entering this or the streamlined inline version of it below. + */ +static struct wc_thread_fpu_count_ent *wc_linuxkm_fpu_state_assoc_unlikely(int create_p) { + int my_cpu = raw_smp_processor_id(); + pid_t my_pid = task_pid_nr(current), slot_pid; + struct wc_thread_fpu_count_ent *slot; + + { + static int _warned_on_null = 0; + if (wc_linuxkm_fpu_states == NULL) + { +#ifdef HAVE_FIPS + /* FIPS needs to use SHA256 for the core verify HMAC, before + * reaching the regular wolfCrypt_Init() logic. to break the + * dependency loop on intelasm builds, we allocate here. + * this is not thread-safe and doesn't need to be. + */ + int ret = allocate_wolfcrypt_linuxkm_fpu_states(); + if (ret != 0) +#endif + { + if (_warned_on_null == 0) { + pr_err("wc_linuxkm_fpu_state_assoc called by pid %d" + " before allocate_wolfcrypt_linuxkm_fpu_states.\n", my_pid); + _warned_on_null = 1; + } + return NULL; + } + } + } + + slot = &wc_linuxkm_fpu_states[my_cpu]; + slot_pid = __atomic_load_n(&slot->pid, __ATOMIC_CONSUME); + if (slot_pid == my_pid) { + if (create_p) { + static int _warned_on_redundant_create_p = 0; + if (_warned_on_redundant_create_p < 10) { + pr_err("wc_linuxkm_fpu_state_assoc called with create_p=1 by" + " pid %d on cpu %d with cpu slot already reserved by" + " said pid.\n", my_pid, my_cpu); + ++_warned_on_redundant_create_p; + } + } + return slot; + } + if (create_p) { + if (slot_pid == 0) { + __atomic_store_n(&slot->pid, my_pid, __ATOMIC_RELEASE); + return slot; + } else { + /* if the slot is already occupied, that can be benign due to a + * migration, but it will require fixup by the thread that owns the + * slot, which will happen when it releases its lock, or sooner (see + * below). + */ + static int _warned_on_mismatched_pid = 0; + if (_warned_on_mismatched_pid < 10) { + pr_warn("wc_linuxkm_fpu_state_assoc called by pid %d on cpu %d" + " but cpu slot already reserved by pid %d.\n", + my_pid, my_cpu, slot_pid); + ++_warned_on_mismatched_pid; + } + return NULL; + } + } else { + /* check for migration. this can happen despite our best efforts if any + * I/O occured while locked, e.g. kernel messages like "uninitialized + * urandom read". since we're locked now, we can safely migrate the + * entry in wc_linuxkm_fpu_states[], freeing up the slot on the previous + * cpu. + */ + unsigned int cpu_i; + for (cpu_i = 0; cpu_i < wc_linuxkm_fpu_states_n_tracked; ++cpu_i) { + if (__atomic_load_n( + &wc_linuxkm_fpu_states[cpu_i].pid, + __ATOMIC_CONSUME) + == my_pid) + { + wc_linuxkm_fpu_states[my_cpu] = wc_linuxkm_fpu_states[cpu_i]; + __atomic_store_n(&wc_linuxkm_fpu_states[cpu_i].fpu_state, 0, + __ATOMIC_RELEASE); + __atomic_store_n(&wc_linuxkm_fpu_states[cpu_i].pid, 0, + __ATOMIC_RELEASE); + return &wc_linuxkm_fpu_states[my_cpu]; + } + } + return NULL; + } +} + +static inline struct wc_thread_fpu_count_ent *wc_linuxkm_fpu_state_assoc( + int create_p) +{ + int my_cpu = raw_smp_processor_id(); /* my_cpu is only trustworthy if we're + * already nonpreemptible -- we'll + * determine that soon enough by + * checking if the pid matches or, + * failing that, if create_p. + */ + pid_t my_pid = task_pid_nr(current), slot_pid; + struct wc_thread_fpu_count_ent *slot; + + if (unlikely(wc_linuxkm_fpu_states == NULL)) + return wc_linuxkm_fpu_state_assoc_unlikely(create_p); + + slot = &wc_linuxkm_fpu_states[my_cpu]; + slot_pid = __atomic_load_n(&slot->pid, __ATOMIC_CONSUME); + if (slot_pid == my_pid) { + if (unlikely(create_p)) + return wc_linuxkm_fpu_state_assoc_unlikely(create_p); + else + return slot; + } + if (likely(create_p)) { + if (likely(slot_pid == 0)) { + __atomic_store_n(&slot->pid, my_pid, __ATOMIC_RELEASE); + return slot; + } else { + return wc_linuxkm_fpu_state_assoc_unlikely(create_p); + } + } else { + return wc_linuxkm_fpu_state_assoc_unlikely(create_p); + } +} + +#endif /* !LINUXKM_FPU_STATES_FOLLOW_THREADS */ + +#ifdef WOLFSSL_COMMERCIAL_LICENSE +static struct fpstate *wc_linuxkm_fpstate_buf_from_fpu_state( + struct wc_thread_fpu_count_ent *state) +{ + size_t i = (size_t)(state - wc_linuxkm_fpu_states) / sizeof(*state); + return &wc_linuxkm_fpu_savebufs[i].fpstate; +} +#endif + +static void wc_linuxkm_fpu_state_release_unlikely( + struct wc_thread_fpu_count_ent *ent) +{ + if (ent->fpu_state != 0) { + static int warned_nonzero_fpu_state = 0; + if (! warned_nonzero_fpu_state) { + pr_err("wc_linuxkm_fpu_state_free for pid %d" + " with nonzero fpu_state 0x%x.\n", ent->pid, ent->fpu_state); + warned_nonzero_fpu_state = 1; + } + ent->fpu_state = 0; + } + __atomic_store_n(&ent->pid, 0, __ATOMIC_RELEASE); +} + +static inline void wc_linuxkm_fpu_state_release( + struct wc_thread_fpu_count_ent *ent) +{ + if (unlikely(ent->fpu_state != 0)) + return wc_linuxkm_fpu_state_release_unlikely(ent); + __atomic_store_n(&ent->pid, 0, __ATOMIC_RELEASE); +} + +WARN_UNUSED_RESULT int can_save_vector_registers_x86(void) +{ + if (irq_fpu_usable()) + return 1; + else if (in_nmi() || (hardirq_count() > 0) || (softirq_count() > 0)) + return 0; + else if (test_thread_flag(TIF_NEED_FPU_LOAD)) + return 1; + return 0; +} + +WARN_UNUSED_RESULT int save_vector_registers_x86(void) +{ +#ifdef LINUXKM_FPU_STATES_FOLLOW_THREADS + struct wc_thread_fpu_count_ent *pstate = wc_linuxkm_fpu_state_assoc(1); +#else + struct wc_thread_fpu_count_ent *pstate = wc_linuxkm_fpu_state_assoc(0); +#endif + + /* allow for nested calls */ +#ifdef LINUXKM_FPU_STATES_FOLLOW_THREADS + if (pstate == NULL) + return MEMORY_E; +#endif + if ( +#ifndef LINUXKM_FPU_STATES_FOLLOW_THREADS + (pstate != NULL) && +#endif + (pstate->fpu_state != 0U)) + { + if (unlikely((pstate->fpu_state & WC_FPU_COUNT_MASK) + == WC_FPU_COUNT_MASK)) + { + pr_err("save_vector_registers_x86 recursion register overflow for " + "pid %d.\n", pstate->pid); + return BAD_STATE_E; + } else { + ++pstate->fpu_state; + return 0; + } + } + + if (irq_fpu_usable() +#if (LINUX_VERSION_CODE < KERNEL_VERSION(5, 17, 0)) + /* work around a kernel bug -- see linux commit 59f5ede3bc0f0. + * what we really want here is this_cpu_read(in_kernel_fpu), but + * in_kernel_fpu is an unexported static array. + */ + && !test_thread_flag(TIF_NEED_FPU_LOAD) +#endif + ) + { +#ifdef WOLFSSL_COMMERCIAL_LICENSE + struct fpstate *fpstate = wc_linuxkm_fpstate_buf_from_fpu_state(pstate); + fpregs_lock(); + fpstate->xfeatures = ~0UL; + os_xsave(fpstate); +#else /* !WOLFSSL_COMMERCIAL_LICENSE */ + #if defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT_COUNT) && \ + (LINUX_VERSION_CODE >= KERNEL_VERSION(5, 7, 0)) + /* inhibit migration, which gums up the algorithm in + * kernel_fpu_{begin,end}(). + */ + migrate_disable(); + #endif + kernel_fpu_begin(); + +#ifndef LINUXKM_FPU_STATES_FOLLOW_THREADS + pstate = wc_linuxkm_fpu_state_assoc(1); + if (pstate == NULL) { + kernel_fpu_end(); + #if defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT_COUNT) && \ + (LINUX_VERSION_CODE >= KERNEL_VERSION(5, 7, 0)) && \ + !defined(WOLFSSL_COMMERCIAL_LICENSE) + migrate_enable(); + #endif + return BAD_STATE_E; + } +#endif + +#endif /* !WOLFSSL_COMMERCIAL_LICENSE */ + /* set msb to 0 to trigger kernel_fpu_end() at cleanup. */ + pstate->fpu_state = 1U; + } else if (in_nmi() || (hardirq_count() > 0) || (softirq_count() > 0)) { + static int warned_fpu_forbidden = 0; + if (! warned_fpu_forbidden) + pr_err("save_vector_registers_x86 called from IRQ handler.\n"); +#ifdef LINUXKM_FPU_STATES_FOLLOW_THREADS + wc_linuxkm_fpu_state_release(pstate); +#endif + return BAD_STATE_E; + } else if (!test_thread_flag(TIF_NEED_FPU_LOAD)) { + static int warned_fpu_forbidden = 0; + if (! warned_fpu_forbidden) + pr_err("save_vector_registers_x86 called with !irq_fpu_usable from" + " thread without previous FPU save.\n"); +#ifdef LINUXKM_FPU_STATES_FOLLOW_THREADS + wc_linuxkm_fpu_state_release(pstate); +#endif + return BAD_STATE_E; + } else { + /* assume already safely in_kernel_fpu from caller, but recursively + * preempt_disable() to be extra-safe. + */ + preempt_disable(); +#if defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT_COUNT) && \ + (LINUX_VERSION_CODE >= KERNEL_VERSION(5, 7, 0)) && \ + !defined(WOLFSSL_COMMERCIAL_LICENSE) + migrate_disable(); +#endif +#ifndef LINUXKM_FPU_STATES_FOLLOW_THREADS + pstate = wc_linuxkm_fpu_state_assoc(1); + if (pstate == NULL) { + #if defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT_COUNT) && \ + (LINUX_VERSION_CODE >= KERNEL_VERSION(5, 7, 0)) && \ + !defined(WOLFSSL_COMMERCIAL_LICENSE) + migrate_enable(); + #endif + preempt_enable(); + return BAD_STATE_E; + } +#endif + /* set msb to 1 to inhibit kernel_fpu_end() at cleanup. */ + pstate->fpu_state = + WC_FPU_SAVED_MASK + 1U; + } + + return 0; +} + +void restore_vector_registers_x86(void) +{ + struct wc_thread_fpu_count_ent *pstate = wc_linuxkm_fpu_state_assoc(0); + if (unlikely(pstate == NULL)) { + pr_err("restore_vector_registers_x86 called by pid %d on CPU %d " + "with no saved state.\n", task_pid_nr(current), + raw_smp_processor_id()); + return; + } + + if ((--pstate->fpu_state & WC_FPU_COUNT_MASK) > 0U) { + return; + } + + if (pstate->fpu_state == 0U) { +#ifdef WOLFSSL_COMMERCIAL_LICENSE + struct fpstate *fpstate = wc_linuxkm_fpstate_buf_from_fpu_state(pstate); + os_xrstor(fpstate, fpstate->xfeatures); + fpregs_unlock(); +#else + #ifndef LINUXKM_FPU_STATES_FOLLOW_THREADS + wc_linuxkm_fpu_state_release(pstate); + #endif + kernel_fpu_end(); +#endif + } else { + pstate->fpu_state = 0U; + #ifndef LINUXKM_FPU_STATES_FOLLOW_THREADS + wc_linuxkm_fpu_state_release(pstate); + #endif + preempt_enable(); + } +#if defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT_COUNT) && \ + (LINUX_VERSION_CODE >= KERNEL_VERSION(5, 7, 0)) && \ + !defined(WOLFSSL_COMMERCIAL_LICENSE) + migrate_enable(); +#endif + +#ifdef LINUXKM_FPU_STATES_FOLLOW_THREADS + wc_linuxkm_fpu_state_release(pstate); +#endif + + return; +}