diff --git a/platform/pc/service.c b/platform/pc/service.c index c24bbf9ed..6000db0bb 100644 --- a/platform/pc/service.c +++ b/platform/pc/service.c @@ -260,6 +260,9 @@ static void __attribute__((noinline)) init_service_new_stack() init_debug("init_hwrand"); init_hwrand(); + init_debug("init cpu features"); + init_cpu_features(); + init_debug("calling kernel_runtime_init"); kernel_runtime_init(kh); while(1); diff --git a/src/x86_64/crt0.s b/src/x86_64/crt0.s index 424ecd371..e9a99be61 100644 --- a/src/x86_64/crt0.s +++ b/src/x86_64/crt0.s @@ -45,19 +45,32 @@ extern init_service wrmsr %endmacro +extern use_xsave -;; XXX - stick with fx until we can choose according to capabilities - -;; otherwise existing stuff breaks with ops, etc. %macro load_extended_registers 1 -; mov edx, 0xffffffff -; mov eax, edx + mov al, [use_xsave] + test al, al + jnz %%xs fxrstor [%1+FRAME_EXTENDED_SAVE*8] + jmp %%out +%%xs: + mov edx, 0xffffffff + mov eax, edx + xrstor [%1+FRAME_EXTENDED_SAVE*8] +%%out: %endmacro %macro save_extended_registers 1 -; mov edx, 0xffffffff -; mov eax, edx + mov al, [use_xsave] + test al, al + jnz %%xs fxsave [%1+FRAME_EXTENDED_SAVE*8] ; we wouldn't have to do this if we could guarantee no other user thread ran before us + jmp %%out +%%xs: + mov edx, 0xffffffff + mov eax, edx + xsave [%1+FRAME_EXTENDED_SAVE*8] +%%out: %endmacro diff --git a/src/x86_64/kernel_machine.c b/src/x86_64/kernel_machine.c index 628dbfc1a..9fd400bf1 100644 --- a/src/x86_64/kernel_machine.c +++ b/src/x86_64/kernel_machine.c @@ -36,7 +36,7 @@ heap allocate_tagged_region(kernel_heaps kh, u64 tag) void clone_frame_pstate(context dest, context src) { runtime_memcpy(dest, src, sizeof(u64) * (FRAME_N_PSTATE + 1)); - runtime_memcpy(dest + FRAME_EXTENDED_SAVE, src + FRAME_EXTENDED_SAVE, extended_frame_size()); + runtime_memcpy(dest + FRAME_EXTENDED_SAVE, src + FRAME_EXTENDED_SAVE, extended_frame_size); } void init_cpuinfo_machine(cpuinfo ci, heap backed) diff --git a/src/x86_64/kernel_machine.h b/src/x86_64/kernel_machine.h index d33254164..d35bb0ec1 100644 --- a/src/x86_64/kernel_machine.h +++ b/src/x86_64/kernel_machine.h @@ -110,6 +110,7 @@ void start_cpu(int index); void allocate_apboot(heap stackheap, void (*ap_entry)()); void deallocate_apboot(heap stackheap); void install_idt(void); +void init_cpu_features(); #define IST_EXCEPTION 1 #define IST_INTERRUPT 2 @@ -165,6 +166,16 @@ static inline void cpuid(u32 fn, u32 ecx, u32 * v) asm volatile("cpuid" : "=a" (v[0]), "=b" (v[1]), "=c" (v[2]), "=d" (v[3]) : "0" (fn), "2" (ecx)); } +static inline void xsetbv(u32 ecx, u32 eax, u32 edx) +{ + asm volatile("xsetbv" : : "a" (eax), "d" (edx), "c" (ecx)); +} + +static inline void xgetbv(u32 ecx, u32 *eax, u32 *edx) +{ + asm volatile("xgetbv" : "=a" (*eax), "=d" (*edx) : "c" (ecx)); +} + /* syscall entry */ static inline void set_syscall_handler(void *syscall_entry) @@ -239,20 +250,10 @@ static inline cpuinfo current_cpu(void) return (cpuinfo)pointer_from_u64(addr); } -static inline u64 extended_frame_size(void) -{ -#if 0 - u32 v[4]; - cpuid(0xd, 0, v); - return v[1]; -#else - return 512; /* XXX fx only right now */ -#endif -} - +extern u64 extended_frame_size; static inline u64 total_frame_size(void) { - return FRAME_EXTENDED_SAVE * sizeof(u64) + extended_frame_size(); + return FRAME_EXTENDED_SAVE * sizeof(u64) + extended_frame_size; } static inline void frame_enable_interrupts(context f) diff --git a/src/x86_64/mp.c b/src/x86_64/mp.c index d51414818..e84f1a23c 100644 --- a/src/x86_64/mp.c +++ b/src/x86_64/mp.c @@ -22,16 +22,45 @@ static void (*start_callback)(); #define mp_debug_u64(x) #endif -void cpu_init(int cpu) +#define CPUID_XSAVE (1<<26) +#define CPUID_AVX (1<<28) + +#define XCR0_SSE (1<<1) +#define XCR0_AVX (1<<2) +u8 use_xsave; +u64 extended_frame_size = 512; + +void init_cpu_features() { u64 cr; + u32 v[4]; + + cpuid(1, 0, v); + if (v[2] & CPUID_XSAVE) + use_xsave = 1; mov_from_cr("cr4", cr); cr |= CR4_PGE | CR4_OSFXSR | CR4_OSXMMEXCPT; + if (use_xsave) + cr |= CR4_OSXSAVE; mov_to_cr("cr4", cr); mov_from_cr("cr0", cr); cr |= C0_MP | C0_WP; cr &= ~C0_EM; mov_to_cr("cr0", cr); + if (use_xsave) { + if (v[2] & CPUID_AVX) { + xgetbv(0, &v[0], &v[1]); + v[0] |= XCR0_SSE | XCR0_AVX; + xsetbv(0, v[0], v[1]); + } + cpuid(0xd, 0, v); + extended_frame_size = v[1]; + } +} + +void cpu_init(int cpu) +{ + init_cpu_features(); u64 addr = u64_from_pointer(cpuinfo_from_id(cpu)); write_msr(KERNEL_GS_MSR, 0); /* clear user GS */ write_msr(GS_MSR, addr);