Skip to content

Commit

Permalink
x86/cpu_entry_area: Move it out of the fixmap
Browse files Browse the repository at this point in the history
Put the cpu_entry_area into a separate P4D entry. The fixmap gets too big
and 0-day already hit a case where the fixmap PTEs were cleared by
cleanup_highmap().

Aside of that the fixmap API is a pain as it's all backwards.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
  • Loading branch information
KAGA-KOKO authored and Ingo Molnar committed Dec 22, 2017
1 parent ed1bbc4 commit 92a0f81
Show file tree
Hide file tree
Showing 14 changed files with 143 additions and 88 deletions.
2 changes: 2 additions & 0 deletions Documentation/x86/x86_64/mm.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ ffffea0000000000 - ffffeaffffffffff (=40 bits) virtual memory map (1TB)
... unused hole ...
ffffec0000000000 - fffffbffffffffff (=44 bits) kasan shadow memory (16TB)
... unused hole ...
fffffe8000000000 - fffffeffffffffff (=39 bits) cpu_entry_area mapping
ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
... unused hole ...
ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
Expand All @@ -35,6 +36,7 @@ ffd4000000000000 - ffd5ffffffffffff (=49 bits) virtual memory map (512TB)
... unused hole ...
ffdf000000000000 - fffffc0000000000 (=53 bits) kasan shadow memory (8PB)
... unused hole ...
fffffe8000000000 - fffffeffffffffff (=39 bits) cpu_entry_area mapping
ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
... unused hole ...
ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
Expand Down
18 changes: 17 additions & 1 deletion arch/x86/include/asm/cpu_entry_area.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,26 @@ struct cpu_entry_area {
};

#define CPU_ENTRY_AREA_SIZE (sizeof(struct cpu_entry_area))
#define CPU_ENTRY_AREA_PAGES (CPU_ENTRY_AREA_SIZE / PAGE_SIZE)
#define CPU_ENTRY_AREA_TOT_SIZE (CPU_ENTRY_AREA_SIZE * NR_CPUS)

DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);

extern void setup_cpu_entry_areas(void);
extern void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags);

#define CPU_ENTRY_AREA_RO_IDT CPU_ENTRY_AREA_BASE
#define CPU_ENTRY_AREA_PER_CPU (CPU_ENTRY_AREA_RO_IDT + PAGE_SIZE)

#define CPU_ENTRY_AREA_RO_IDT_VADDR ((void *)CPU_ENTRY_AREA_RO_IDT)

#define CPU_ENTRY_AREA_MAP_SIZE \
(CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_TOT_SIZE - CPU_ENTRY_AREA_BASE)

extern struct cpu_entry_area *get_cpu_entry_area(int cpu);

static inline struct entry_stack *cpu_entry_stack(int cpu)
{
return &get_cpu_entry_area(cpu)->entry_stack_page.stack;
}

#endif
1 change: 1 addition & 0 deletions arch/x86/include/asm/desc.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <asm/mmu.h>
#include <asm/fixmap.h>
#include <asm/irq_vectors.h>
#include <asm/cpu_entry_area.h>

#include <linux/smp.h>
#include <linux/percpu.h>
Expand Down
32 changes: 1 addition & 31 deletions arch/x86/include/asm/fixmap.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
#else
#include <uapi/asm/vsyscall.h>
#endif
#include <asm/cpu_entry_area.h>

/*
* We can't declare FIXADDR_TOP as variable for x86_64 because vsyscall
Expand Down Expand Up @@ -84,7 +83,6 @@ enum fixed_addresses {
FIX_IO_APIC_BASE_0,
FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1,
#endif
FIX_RO_IDT, /* Virtual mapping for read-only IDT */
#ifdef CONFIG_X86_32
FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */
FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
Expand All @@ -100,9 +98,6 @@ enum fixed_addresses {
#ifdef CONFIG_X86_INTEL_MID
FIX_LNW_VRTC,
#endif
/* Fixmap entries to remap the GDTs, one per processor. */
FIX_CPU_ENTRY_AREA_TOP,
FIX_CPU_ENTRY_AREA_BOTTOM = FIX_CPU_ENTRY_AREA_TOP + (CPU_ENTRY_AREA_PAGES * NR_CPUS) - 1,

#ifdef CONFIG_ACPI_APEI_GHES
/* Used for GHES mapping from assorted contexts */
Expand Down Expand Up @@ -143,7 +138,7 @@ enum fixed_addresses {
extern void reserve_top_address(unsigned long reserve);

#define FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT)
#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)

extern int fixmaps_set;

Expand Down Expand Up @@ -191,30 +186,5 @@ void __init *early_memremap_decrypted_wp(resource_size_t phys_addr,
void __early_set_fixmap(enum fixed_addresses idx,
phys_addr_t phys, pgprot_t flags);

static inline unsigned int __get_cpu_entry_area_page_index(int cpu, int page)
{
BUILD_BUG_ON(sizeof(struct cpu_entry_area) % PAGE_SIZE != 0);

return FIX_CPU_ENTRY_AREA_BOTTOM - cpu*CPU_ENTRY_AREA_PAGES - page;
}

#define __get_cpu_entry_area_offset_index(cpu, offset) ({ \
BUILD_BUG_ON(offset % PAGE_SIZE != 0); \
__get_cpu_entry_area_page_index(cpu, offset / PAGE_SIZE); \
})

#define get_cpu_entry_area_index(cpu, field) \
__get_cpu_entry_area_offset_index((cpu), offsetof(struct cpu_entry_area, field))

static inline struct cpu_entry_area *get_cpu_entry_area(int cpu)
{
return (struct cpu_entry_area *)__fix_to_virt(__get_cpu_entry_area_page_index(cpu, 0));
}

static inline struct entry_stack *cpu_entry_stack(int cpu)
{
return &get_cpu_entry_area(cpu)->entry_stack_page.stack;
}

#endif /* !__ASSEMBLY__ */
#endif /* _ASM_X86_FIXMAP_H */
15 changes: 12 additions & 3 deletions arch/x86/include/asm/pgtable_32_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,22 @@ extern bool __vmalloc_start_set; /* set once high_memory is set */
#define LAST_PKMAP 1024
#endif

#define PKMAP_BASE ((FIXADDR_START - PAGE_SIZE * (LAST_PKMAP + 1)) \
& PMD_MASK)
/*
* Define this here and validate with BUILD_BUG_ON() in pgtable_32.c
* to avoid include recursion hell
*/
#define CPU_ENTRY_AREA_PAGES (NR_CPUS * 40)

#define CPU_ENTRY_AREA_BASE \
((FIXADDR_START - PAGE_SIZE * (CPU_ENTRY_AREA_PAGES + 1)) & PMD_MASK)

#define PKMAP_BASE \
((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK)

#ifdef CONFIG_HIGHMEM
# define VMALLOC_END (PKMAP_BASE - 2 * PAGE_SIZE)
#else
# define VMALLOC_END (FIXADDR_START - 2 * PAGE_SIZE)
# define VMALLOC_END (CPU_ENTRY_AREA_BASE - 2 * PAGE_SIZE)
#endif

#define MODULES_VADDR VMALLOC_START
Expand Down
47 changes: 28 additions & 19 deletions arch/x86/include/asm/pgtable_64_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,32 +76,41 @@ typedef struct { pteval_t pte; } pte_t;
#define PGDIR_MASK (~(PGDIR_SIZE - 1))

/* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */
#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)

#ifdef CONFIG_X86_5LEVEL
#define VMALLOC_SIZE_TB _AC(16384, UL)
#define __VMALLOC_BASE _AC(0xff92000000000000, UL)
#define __VMEMMAP_BASE _AC(0xffd4000000000000, UL)
# define VMALLOC_SIZE_TB _AC(16384, UL)
# define __VMALLOC_BASE _AC(0xff92000000000000, UL)
# define __VMEMMAP_BASE _AC(0xffd4000000000000, UL)
#else
#define VMALLOC_SIZE_TB _AC(32, UL)
#define __VMALLOC_BASE _AC(0xffffc90000000000, UL)
#define __VMEMMAP_BASE _AC(0xffffea0000000000, UL)
# define VMALLOC_SIZE_TB _AC(32, UL)
# define __VMALLOC_BASE _AC(0xffffc90000000000, UL)
# define __VMEMMAP_BASE _AC(0xffffea0000000000, UL)
#endif

#ifdef CONFIG_RANDOMIZE_MEMORY
#define VMALLOC_START vmalloc_base
#define VMEMMAP_START vmemmap_base
# define VMALLOC_START vmalloc_base
# define VMEMMAP_START vmemmap_base
#else
#define VMALLOC_START __VMALLOC_BASE
#define VMEMMAP_START __VMEMMAP_BASE
# define VMALLOC_START __VMALLOC_BASE
# define VMEMMAP_START __VMEMMAP_BASE
#endif /* CONFIG_RANDOMIZE_MEMORY */
#define VMALLOC_END (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL))
#define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE)

#define VMALLOC_END (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL))

#define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
/* The module sections ends with the start of the fixmap */
#define MODULES_END __fix_to_virt(__end_of_fixed_addresses + 1)
#define MODULES_LEN (MODULES_END - MODULES_VADDR)
#define ESPFIX_PGD_ENTRY _AC(-2, UL)
#define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << P4D_SHIFT)
#define EFI_VA_START ( -4 * (_AC(1, UL) << 30))
#define EFI_VA_END (-68 * (_AC(1, UL) << 30))
#define MODULES_END __fix_to_virt(__end_of_fixed_addresses + 1)
#define MODULES_LEN (MODULES_END - MODULES_VADDR)

#define ESPFIX_PGD_ENTRY _AC(-2, UL)
#define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << P4D_SHIFT)

#define CPU_ENTRY_AREA_PGD _AC(-3, UL)
#define CPU_ENTRY_AREA_BASE (CPU_ENTRY_AREA_PGD << P4D_SHIFT)

#define EFI_VA_START ( -4 * (_AC(1, UL) << 30))
#define EFI_VA_END (-68 * (_AC(1, UL) << 30))

#define EARLY_DYNAMIC_PAGE_TABLES 64

Expand Down
1 change: 1 addition & 0 deletions arch/x86/kernel/dumpstack.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include <linux/nmi.h>
#include <linux/sysfs.h>

#include <asm/cpu_entry_area.h>
#include <asm/stacktrace.h>
#include <asm/unwind.h>

Expand Down
5 changes: 3 additions & 2 deletions arch/x86/kernel/traps.c
Original file line number Diff line number Diff line change
Expand Up @@ -951,8 +951,9 @@ void __init trap_init(void)
* "sidt" instruction will not leak the location of the kernel, and
* to defend the IDT against arbitrary memory write vulnerabilities.
* It will be reloaded in cpu_init() */
__set_fixmap(FIX_RO_IDT, __pa_symbol(idt_table), PAGE_KERNEL_RO);
idt_descr.address = fix_to_virt(FIX_RO_IDT);
cea_set_pte(CPU_ENTRY_AREA_RO_IDT_VADDR, __pa_symbol(idt_table),
PAGE_KERNEL_RO);
idt_descr.address = CPU_ENTRY_AREA_RO_IDT;

/*
* Should be a barrier for any external CPU state:
Expand Down
66 changes: 50 additions & 16 deletions arch/x86/mm/cpu_entry_area.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,27 @@ static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
#endif

struct cpu_entry_area *get_cpu_entry_area(int cpu)
{
unsigned long va = CPU_ENTRY_AREA_PER_CPU + cpu * CPU_ENTRY_AREA_SIZE;
BUILD_BUG_ON(sizeof(struct cpu_entry_area) % PAGE_SIZE != 0);

return (struct cpu_entry_area *) va;
}
EXPORT_SYMBOL(get_cpu_entry_area);

void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags)
{
unsigned long va = (unsigned long) cea_vaddr;

set_pte_vaddr(va, pfn_pte(pa >> PAGE_SHIFT, flags));
}

static void __init
set_percpu_fixmap_pages(int idx, void *ptr, int pages, pgprot_t prot)
cea_map_percpu_pages(void *cea_vaddr, void *ptr, int pages, pgprot_t prot)
{
for ( ; pages; pages--, idx--, ptr += PAGE_SIZE)
__set_fixmap(idx, per_cpu_ptr_to_phys(ptr), prot);
for ( ; pages; pages--, cea_vaddr+= PAGE_SIZE, ptr += PAGE_SIZE)
cea_set_pte(cea_vaddr, per_cpu_ptr_to_phys(ptr), prot);
}

/* Setup the fixmap mappings only once per-processor */
Expand Down Expand Up @@ -47,10 +63,12 @@ static void __init setup_cpu_entry_area(int cpu)
pgprot_t tss_prot = PAGE_KERNEL;
#endif

__set_fixmap(get_cpu_entry_area_index(cpu, gdt), get_cpu_gdt_paddr(cpu), gdt_prot);
set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, entry_stack_page),
per_cpu_ptr(&entry_stack_storage, cpu), 1,
PAGE_KERNEL);
cea_set_pte(&get_cpu_entry_area(cpu)->gdt, get_cpu_gdt_paddr(cpu),
gdt_prot);

cea_map_percpu_pages(&get_cpu_entry_area(cpu)->entry_stack_page,
per_cpu_ptr(&entry_stack_storage, cpu), 1,
PAGE_KERNEL);

/*
* The Intel SDM says (Volume 3, 7.2.1):
Expand All @@ -72,10 +90,9 @@ static void __init setup_cpu_entry_area(int cpu)
BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^
offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK);
BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0);
set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, tss),
&per_cpu(cpu_tss_rw, cpu),
sizeof(struct tss_struct) / PAGE_SIZE,
tss_prot);
cea_map_percpu_pages(&get_cpu_entry_area(cpu)->tss,
&per_cpu(cpu_tss_rw, cpu),
sizeof(struct tss_struct) / PAGE_SIZE, tss_prot);

#ifdef CONFIG_X86_32
per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu);
Expand All @@ -85,20 +102,37 @@ static void __init setup_cpu_entry_area(int cpu)
BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0);
BUILD_BUG_ON(sizeof(exception_stacks) !=
sizeof(((struct cpu_entry_area *)0)->exception_stacks));
set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, exception_stacks),
&per_cpu(exception_stacks, cpu),
sizeof(exception_stacks) / PAGE_SIZE,
PAGE_KERNEL);
cea_map_percpu_pages(&get_cpu_entry_area(cpu)->exception_stacks,
&per_cpu(exception_stacks, cpu),
sizeof(exception_stacks) / PAGE_SIZE, PAGE_KERNEL);

__set_fixmap(get_cpu_entry_area_index(cpu, entry_trampoline),
cea_set_pte(&get_cpu_entry_area(cpu)->entry_trampoline,
__pa_symbol(_entry_trampoline), PAGE_KERNEL_RX);
#endif
}

static __init void setup_cpu_entry_area_ptes(void)
{
#ifdef CONFIG_X86_32
unsigned long start, end;

BUILD_BUG_ON(CPU_ENTRY_AREA_PAGES * PAGE_SIZE < CPU_ENTRY_AREA_MAP_SIZE);
BUG_ON(CPU_ENTRY_AREA_BASE & ~PMD_MASK);

start = CPU_ENTRY_AREA_BASE;
end = start + CPU_ENTRY_AREA_MAP_SIZE;

for (; start < end; start += PMD_SIZE)
populate_extra_pte(start);
#endif
}

void __init setup_cpu_entry_areas(void)
{
unsigned int cpu;

setup_cpu_entry_area_ptes();

for_each_possible_cpu(cpu)
setup_cpu_entry_area(cpu);
}
6 changes: 5 additions & 1 deletion arch/x86/mm/dump_pagetables.c
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ enum address_markers_idx {
KASAN_SHADOW_START_NR,
KASAN_SHADOW_END_NR,
#endif
CPU_ENTRY_AREA_NR,
#ifdef CONFIG_X86_ESPFIX64
ESPFIX_START_NR,
#endif
Expand All @@ -81,6 +82,7 @@ static struct addr_marker address_markers[] = {
[KASAN_SHADOW_START_NR] = { KASAN_SHADOW_START, "KASAN shadow" },
[KASAN_SHADOW_END_NR] = { KASAN_SHADOW_END, "KASAN shadow end" },
#endif
[CPU_ENTRY_AREA_NR] = { CPU_ENTRY_AREA_BASE,"CPU entry Area" },
#ifdef CONFIG_X86_ESPFIX64
[ESPFIX_START_NR] = { ESPFIX_BASE_ADDR, "ESPfix Area", 16 },
#endif
Expand All @@ -104,6 +106,7 @@ enum address_markers_idx {
#ifdef CONFIG_HIGHMEM
PKMAP_BASE_NR,
#endif
CPU_ENTRY_AREA_NR,
FIXADDR_START_NR,
END_OF_SPACE_NR,
};
Expand All @@ -116,6 +119,7 @@ static struct addr_marker address_markers[] = {
#ifdef CONFIG_HIGHMEM
[PKMAP_BASE_NR] = { 0UL, "Persistent kmap() Area" },
#endif
[CPU_ENTRY_AREA_NR] = { 0UL, "CPU entry area" },
[FIXADDR_START_NR] = { 0UL, "Fixmap area" },
[END_OF_SPACE_NR] = { -1, NULL }
};
Expand Down Expand Up @@ -541,8 +545,8 @@ static int __init pt_dump_init(void)
address_markers[PKMAP_BASE_NR].start_address = PKMAP_BASE;
# endif
address_markers[FIXADDR_START_NR].start_address = FIXADDR_START;
address_markers[CPU_ENTRY_AREA_NR].start_address = CPU_ENTRY_AREA_BASE;
#endif

return 0;
}
__initcall(pt_dump_init);
6 changes: 6 additions & 0 deletions arch/x86/mm/init_32.c
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
#include <asm/setup.h>
#include <asm/set_memory.h>
#include <asm/page_types.h>
#include <asm/cpu_entry_area.h>
#include <asm/init.h>

#include "mm_internal.h"
Expand Down Expand Up @@ -766,6 +767,7 @@ void __init mem_init(void)
mem_init_print_info(NULL);
printk(KERN_INFO "virtual kernel memory layout:\n"
" fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
" cpu_entry : 0x%08lx - 0x%08lx (%4ld kB)\n"
#ifdef CONFIG_HIGHMEM
" pkmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
#endif
Expand All @@ -777,6 +779,10 @@ void __init mem_init(void)
FIXADDR_START, FIXADDR_TOP,
(FIXADDR_TOP - FIXADDR_START) >> 10,

CPU_ENTRY_AREA_BASE,
CPU_ENTRY_AREA_BASE + CPU_ENTRY_AREA_MAP_SIZE,
CPU_ENTRY_AREA_MAP_SIZE >> 10,

#ifdef CONFIG_HIGHMEM
PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE,
(LAST_PKMAP*PAGE_SIZE) >> 10,
Expand Down
Loading

0 comments on commit 92a0f81

Please sign in to comment.