提交 4a692ae3 编写于 作者: L Linus Torvalds

Merge tag 'x86_mm_for_v5.17_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 mm updates from Borislav Petkov:

 - Flush *all* mappings from the TLB after switching to the trampoline
   pagetable to prevent any stale entries' presence

 - Flush global mappings from the TLB, in addition to the CR3-write,
   after switching off of the trampoline_pgd during boot to clear the
   identity mappings

 - Prevent instrumentation issues resulting from the above changes

* tag 'x86_mm_for_v5.17_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/mm: Prevent early boot triple-faults with instrumentation
  x86/mm: Include spinlock_t definition in pgtable.
  x86/mm: Flush global TLB when switching to trampoline page-table
  x86/mm/64: Flush global TLB on boot and AP bringup
  x86/realmode: Add comment for Global bit usage in trampoline_pgd
  x86/mm: Add missing <asm/cpufeatures.h> dependency to <asm/page_64.h>
......@@ -5,6 +5,7 @@
#include <asm/page_64_types.h>
#ifndef __ASSEMBLY__
#include <asm/cpufeatures.h>
#include <asm/alternative.h>
/* duplicated to the one in bootmem.h */
......
......@@ -22,6 +22,7 @@
#define pgprot_decrypted(prot) __pgprot(__sme_clr(pgprot_val(prot)))
#ifndef __ASSEMBLY__
#include <linux/spinlock.h>
#include <asm/x86_init.h>
#include <asm/pkru.h>
#include <asm/fpu/api.h>
......
......@@ -89,6 +89,7 @@ static inline void set_real_mode_mem(phys_addr_t mem)
}
void reserve_real_mode(void);
void load_trampoline_pgtable(void);
#endif /* __ASSEMBLY__ */
......
......@@ -261,4 +261,9 @@ extern void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch);
#endif /* !MODULE */
static inline void __native_tlb_flush_global(unsigned long cr4)
{
native_write_cr4(cr4 ^ X86_CR4_PGE);
native_write_cr4(cr4);
}
#endif /* _ASM_X86_TLBFLUSH_H */
......@@ -384,7 +384,7 @@ void native_write_cr0(unsigned long val)
}
EXPORT_SYMBOL(native_write_cr0);
void native_write_cr4(unsigned long val)
void __no_profile native_write_cr4(unsigned long val)
{
unsigned long bits_changed = 0;
......
......@@ -487,6 +487,10 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data)
clear_bss();
/*
* This needs to happen *before* kasan_early_init() because latter maps stuff
* into that page.
*/
clear_page(init_top_pgt);
/*
......@@ -498,6 +502,16 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data)
kasan_early_init();
/*
* Flush global TLB entries which could be left over from the trampoline page
* table.
*
* This needs to happen *after* kasan_early_init() as KASAN-enabled .configs
* instrument native_write_cr4() so KASAN must be initialized for that
* instrumentation to work.
*/
__native_tlb_flush_global(this_cpu_read(cpu_tlbstate.cr4));
idt_setup_early_handler();
copy_bootdata(__va(real_mode_data));
......
......@@ -166,9 +166,26 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
call sev_verify_cbit
popq %rsi
/* Switch to new page-table */
/*
* Switch to new page-table
*
* For the boot CPU this switches to early_top_pgt which still has the
* indentity mappings present. The secondary CPUs will switch to the
* init_top_pgt here, away from the trampoline_pgd and unmap the
* indentity mapped ranges.
*/
movq %rax, %cr3
/*
* Do a global TLB flush after the CR3 switch to make sure the TLB
* entries from the identity mapping are flushed.
*/
movq %cr4, %rcx
movq %rcx, %rax
xorq $X86_CR4_PGE, %rcx
movq %rcx, %cr4
movq %rax, %cr4
/* Ensure I am executing from virtual addresses */
movq $1f, %rax
ANNOTATE_RETPOLINE_SAFE
......
......@@ -113,17 +113,9 @@ void __noreturn machine_real_restart(unsigned int type)
spin_unlock(&rtc_lock);
/*
* Switch back to the initial page table.
* Switch to the trampoline page table.
*/
#ifdef CONFIG_X86_32
load_cr3(initial_page_table);
#else
write_cr3(real_mode_header->trampoline_pgd);
/* Exiting long mode will fail if CR4.PCIDE is set. */
if (boot_cpu_has(X86_FEATURE_PCID))
cr4_clear_bits(X86_CR4_PCIDE);
#endif
load_trampoline_pgtable();
/* Jump to the identity-mapped low memory code */
#ifdef CONFIG_X86_32
......
......@@ -714,6 +714,11 @@ static void __init memory_map_bottom_up(unsigned long map_start,
static void __init init_trampoline(void)
{
#ifdef CONFIG_X86_64
/*
* The code below will alias kernel page-tables in the user-range of the
* address space, including the Global bit. So global TLB entries will
* be created when using the trampoline page-table.
*/
if (!kaslr_memory_enabled())
trampoline_pgd_entry = init_top_pgt[pgd_index(__PAGE_OFFSET)];
else
......
......@@ -1148,7 +1148,7 @@ void flush_tlb_one_user(unsigned long addr)
*/
STATIC_NOPV void native_flush_tlb_global(void)
{
unsigned long cr4, flags;
unsigned long flags;
if (static_cpu_has(X86_FEATURE_INVPCID)) {
/*
......@@ -1168,11 +1168,7 @@ STATIC_NOPV void native_flush_tlb_global(void)
*/
raw_local_irq_save(flags);
cr4 = this_cpu_read(cpu_tlbstate.cr4);
/* toggle PGE */
native_write_cr4(cr4 ^ X86_CR4_PGE);
/* write old PGE again and flush TLBs */
native_write_cr4(cr4);
__native_tlb_flush_global(this_cpu_read(cpu_tlbstate.cr4));
raw_local_irq_restore(flags);
}
......
......@@ -17,6 +17,32 @@ u32 *trampoline_cr4_features;
/* Hold the pgd entry used on booting additional CPUs */
pgd_t trampoline_pgd_entry;
void load_trampoline_pgtable(void)
{
#ifdef CONFIG_X86_32
load_cr3(initial_page_table);
#else
/*
* This function is called before exiting to real-mode and that will
* fail with CR4.PCIDE still set.
*/
if (boot_cpu_has(X86_FEATURE_PCID))
cr4_clear_bits(X86_CR4_PCIDE);
write_cr3(real_mode_header->trampoline_pgd);
#endif
/*
* The CR3 write above will not flush global TLB entries.
* Stale, global entries from previous page tables may still be
* present. Flush those stale entries.
*
* This ensures that memory accessed while running with
* trampoline_pgd is *actually* mapped into trampoline_pgd.
*/
__flush_tlb_all();
}
void __init reserve_real_mode(void)
{
phys_addr_t mem;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册