diff --git a/MAINTAINERS b/MAINTAINERS index b61848afb9d2618ab5eb1be26a3d3c41eff6c451..209306019483c88ece7c990b9d3fe85f77837535 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10135,7 +10135,7 @@ F: include/uapi/linux/ppdev.h F: Documentation/parport*.txt PARAVIRT_OPS INTERFACE -M: Jeremy Fitzhardinge +M: Juergen Gross M: Chris Wright M: Alok Kataria M: Rusty Russell @@ -10143,7 +10143,7 @@ L: virtualization@lists.linux-foundation.org S: Supported F: Documentation/virtual/paravirt_ops.txt F: arch/*/kernel/paravirt* -F: arch/*/include/asm/paravirt.h +F: arch/*/include/asm/paravirt*.h F: include/linux/hypervisor.h PARIDE DRIVERS FOR PARALLEL PORT IDE DEVICES diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index 1a2ba368da39ff4fbf24a922415490e1a2be68e6..9d0e13738ed3d3ea9cc9200c62153c101624a7b7 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -121,7 +121,6 @@ static inline int desc_empty(const void *ptr) #define load_ldt(ldt) asm volatile("lldt %0"::"m" (ldt)) #define store_gdt(dtr) native_store_gdt(dtr) -#define store_idt(dtr) native_store_idt(dtr) #define store_tr(tr) (tr = native_store_tr()) #define load_TLS(t, cpu) native_load_tls(t, cpu) @@ -228,7 +227,7 @@ static inline void native_store_gdt(struct desc_ptr *dtr) asm volatile("sgdt %0":"=m" (*dtr)); } -static inline void native_store_idt(struct desc_ptr *dtr) +static inline void store_idt(struct desc_ptr *dtr) { asm volatile("sidt %0":"=m" (*dtr)); } diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index c25dd22f7c70e97f293ecb086c1be572087ae0b3..12deec722cf0a14374d794b037eb26bf2e02ad51 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -71,11 +71,6 @@ static inline void write_cr3(unsigned long x) PVOP_VCALL1(pv_mmu_ops.write_cr3, x); } -static inline unsigned long __read_cr4(void) -{ - return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4); -} - static inline void __write_cr4(unsigned long x) { PVOP_VCALL1(pv_cpu_ops.write_cr4, x); @@ -228,10 +223,6 @@ static inline void set_ldt(const void *addr, unsigned entries) { PVOP_VCALL2(pv_cpu_ops.set_ldt, addr, entries); } -static inline void store_idt(struct desc_ptr *dtr) -{ - PVOP_VCALL1(pv_cpu_ops.store_idt, dtr); -} static inline unsigned long paravirt_store_tr(void) { return PVOP_CALL0(unsigned long, pv_cpu_ops.store_tr); @@ -365,12 +356,6 @@ static inline void paravirt_release_p4d(unsigned long pfn) PVOP_VCALL1(pv_mmu_ops.release_p4d, pfn); } -static inline void pte_update(struct mm_struct *mm, unsigned long addr, - pte_t *ptep) -{ - PVOP_VCALL3(pv_mmu_ops.pte_update, mm, addr, ptep); -} - static inline pte_t __pte(pteval_t val) { pteval_t ret; @@ -472,28 +457,6 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, PVOP_VCALL4(pv_mmu_ops.set_pte_at, mm, addr, ptep, pte.pte); } -static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, - pmd_t *pmdp, pmd_t pmd) -{ - if (sizeof(pmdval_t) > sizeof(long)) - /* 5 arg words */ - pv_mmu_ops.set_pmd_at(mm, addr, pmdp, pmd); - else - PVOP_VCALL4(pv_mmu_ops.set_pmd_at, mm, addr, pmdp, - native_pmd_val(pmd)); -} - -static inline void set_pud_at(struct mm_struct *mm, unsigned long addr, - pud_t *pudp, pud_t pud) -{ - if (sizeof(pudval_t) > sizeof(long)) - /* 5 arg words */ - pv_mmu_ops.set_pud_at(mm, addr, pudp, pud); - else - PVOP_VCALL4(pv_mmu_ops.set_pud_at, mm, addr, pudp, - native_pud_val(pud)); -} - static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) { pmdval_t val = native_pmd_val(pmd); diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 6b64fc6367f2520858cce6e0c31d111836f1e509..42873edd9f9d20cda2d1cfcb92b3798c4a75b3aa 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -107,7 +107,6 @@ struct pv_cpu_ops { unsigned long (*read_cr0)(void); void (*write_cr0)(unsigned long); - unsigned long (*read_cr4)(void); void (*write_cr4)(unsigned long); #ifdef CONFIG_X86_64 @@ -119,8 +118,6 @@ struct pv_cpu_ops { void (*load_tr_desc)(void); void (*load_gdt)(const struct desc_ptr *); void (*load_idt)(const struct desc_ptr *); - /* store_gdt has been removed. */ - void (*store_idt)(struct desc_ptr *); void (*set_ldt)(const void *desc, unsigned entries); unsigned long (*store_tr)(void); void (*load_tls)(struct thread_struct *t, unsigned int cpu); @@ -245,12 +242,6 @@ struct pv_mmu_ops { void (*set_pte_at)(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pteval); void (*set_pmd)(pmd_t *pmdp, pmd_t pmdval); - void (*set_pmd_at)(struct mm_struct *mm, unsigned long addr, - pmd_t *pmdp, pmd_t pmdval); - void (*set_pud_at)(struct mm_struct *mm, unsigned long addr, - pud_t *pudp, pud_t pudval); - void (*pte_update)(struct mm_struct *mm, unsigned long addr, - pte_t *ptep); pte_t (*ptep_modify_prot_start)(struct mm_struct *mm, unsigned long addr, pte_t *ptep); diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 5b4c44d419c55cafbafbddc8562e5c4b8d074c29..b714934512b391905ed37400b6acb37ce6529ca2 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -55,8 +55,6 @@ extern pmdval_t early_pmd_flags; #else /* !CONFIG_PARAVIRT */ #define set_pte(ptep, pte) native_set_pte(ptep, pte) #define set_pte_at(mm, addr, ptep, pte) native_set_pte_at(mm, addr, ptep, pte) -#define set_pmd_at(mm, addr, pmdp, pmd) native_set_pmd_at(mm, addr, pmdp, pmd) -#define set_pud_at(mm, addr, pudp, pud) native_set_pud_at(mm, addr, pudp, pud) #define set_pte_atomic(ptep, pte) \ native_set_pte_atomic(ptep, pte) @@ -87,8 +85,6 @@ extern pmdval_t early_pmd_flags; #define pte_clear(mm, addr, ptep) native_pte_clear(mm, addr, ptep) #define pmd_clear(pmd) native_pmd_clear(pmd) -#define pte_update(mm, addr, ptep) do { } while (0) - #define pgd_val(x) native_pgd_val(x) #define __pgd(x) native_make_pgd(x) @@ -979,31 +975,18 @@ static inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr, native_set_pte(ptep, pte); } -static inline void native_set_pmd_at(struct mm_struct *mm, unsigned long addr, - pmd_t *pmdp , pmd_t pmd) +static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp, pmd_t pmd) { native_set_pmd(pmdp, pmd); } -static inline void native_set_pud_at(struct mm_struct *mm, unsigned long addr, - pud_t *pudp, pud_t pud) +static inline void set_pud_at(struct mm_struct *mm, unsigned long addr, + pud_t *pudp, pud_t pud) { native_set_pud(pudp, pud); } -#ifndef CONFIG_PARAVIRT -/* - * Rules for using pte_update - it must be called after any PTE update which - * has not been done using the set_pte / clear_pte interfaces. It is used by - * shadow mode hypervisors to resynchronize the shadow page tables. Kernel PTE - * updates should either be sets, clears, or set_pte_atomic for P->P - * transitions, which means this hook should only be called for user PTEs. - * This hook implies a P->P protection or access change has taken place, which - * requires a subsequent TLB flush. - */ -#define pte_update(mm, addr, ptep) do { } while (0) -#endif - /* * We only update the dirty/accessed state if we set * the dirty bit by hand in the kernel, since the hardware @@ -1031,7 +1014,6 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { pte_t pte = native_ptep_get_and_clear(ptep); - pte_update(mm, addr, ptep); return pte; } @@ -1058,7 +1040,6 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { clear_bit(_PAGE_BIT_RW, (unsigned long *)&ptep->pte); - pte_update(mm, addr, ptep); } #define flush_tlb_fix_spurious_fault(vma, address) do { } while (0) diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h index 9efaabf5b54be04d722295dea1fd20a6e2515825..a24dfcf79f4acadd7b49f47c5138bc05cc50aa56 100644 --- a/arch/x86/include/asm/special_insns.h +++ b/arch/x86/include/asm/special_insns.h @@ -135,6 +135,11 @@ static inline void native_wbinvd(void) extern asmlinkage void native_load_gs_index(unsigned); +static inline unsigned long __read_cr4(void) +{ + return native_read_cr4(); +} + #ifdef CONFIG_PARAVIRT #include #else @@ -173,11 +178,6 @@ static inline void write_cr3(unsigned long x) native_write_cr3(x); } -static inline unsigned long __read_cr4(void) -{ - return native_read_cr4(); -} - static inline void __write_cr4(unsigned long x) { native_write_cr4(x); diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h index 7032f4d8dff30be9557eedb26f6dcb779b26d3eb..f65d12504e8051f2e47aae2a0076a883c1ce535e 100644 --- a/arch/x86/include/uapi/asm/hyperv.h +++ b/arch/x86/include/uapi/asm/hyperv.h @@ -152,12 +152,6 @@ /* Recommend using the newer ExProcessorMasks interface */ #define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED (1 << 11) -/* - * HV_VP_SET available - */ -#define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED (1 << 11) - - /* * Crash notification flag. */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index fb1d3358a4af5bde82e5d8dd751b2db3271a2c82..775f10100d7febac27a84bb9c8deab24e119a8c8 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -169,21 +169,21 @@ static int __init x86_mpx_setup(char *s) __setup("nompx", x86_mpx_setup); #ifdef CONFIG_X86_64 -static int __init x86_pcid_setup(char *s) +static int __init x86_nopcid_setup(char *s) { - /* require an exact match without trailing characters */ - if (strlen(s)) - return 0; + /* nopcid doesn't accept parameters */ + if (s) + return -EINVAL; /* do not emit a message if the feature is not present */ if (!boot_cpu_has(X86_FEATURE_PCID)) - return 1; + return 0; setup_clear_cpu_cap(X86_FEATURE_PCID); pr_info("nopcid: PCID feature disabled\n"); - return 1; + return 0; } -__setup("nopcid", x86_pcid_setup); +early_param("nopcid", x86_nopcid_setup); #endif static int __init x86_noinvpcid_setup(char *s) @@ -329,38 +329,6 @@ static __always_inline void setup_smap(struct cpuinfo_x86 *c) } } -static void setup_pcid(struct cpuinfo_x86 *c) -{ - if (cpu_has(c, X86_FEATURE_PCID)) { - if (cpu_has(c, X86_FEATURE_PGE)) { - /* - * We'd like to use cr4_set_bits_and_update_boot(), - * but we can't. CR4.PCIDE is special and can only - * be set in long mode, and the early CPU init code - * doesn't know this and would try to restore CR4.PCIDE - * prior to entering long mode. - * - * Instead, we rely on the fact that hotplug, resume, - * etc all fully restore CR4 before they write anything - * that could have nonzero PCID bits to CR3. CR4.PCIDE - * has no effect on the page tables themselves, so we - * don't need it to be restored early. - */ - cr4_set_bits(X86_CR4_PCIDE); - } else { - /* - * flush_tlb_all(), as currently implemented, won't - * work if PCID is on but PGE is not. Since that - * combination doesn't exist on real hardware, there's - * no reason to try to fully support it, but it's - * polite to avoid corrupting data if we're on - * an improperly configured VM. - */ - clear_cpu_cap(c, X86_FEATURE_PCID); - } - } -} - /* * Protection Keys are not available in 32-bit mode. */ @@ -1175,9 +1143,6 @@ static void identify_cpu(struct cpuinfo_x86 *c) setup_smep(c); setup_smap(c); - /* Set up PCID */ - setup_pcid(c); - /* * The vendor-specific functions might have changed features. * Now we do "generic changes." diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 3b3f713e15e5fea418ebc8eae3bc7a948570dd73..236324e83a3ae0755c4de2a087983759793c0add 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -59,8 +59,6 @@ void hyperv_vector_handler(struct pt_regs *regs) void hv_setup_vmbus_irq(void (*handler)(void)) { vmbus_handler = handler; - /* Setup the IDT for hypervisor callback */ - alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, hyperv_callback_vector); } void hv_remove_vmbus_irq(void) @@ -251,6 +249,8 @@ static void __init ms_hyperv_init_platform(void) */ x86_platform.apic_post_init = hyperv_init; hyperv_setup_mmu_ops(); + /* Setup the IDT for hypervisor callback */ + alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, hyperv_callback_vector); #endif } diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index a14df9eecfed16598e131c41c3df4967a18f2871..19a3e8f961c772af6572f80a860e1c892ad31a32 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -327,7 +327,6 @@ __visible struct pv_cpu_ops pv_cpu_ops = { .set_debugreg = native_set_debugreg, .read_cr0 = native_read_cr0, .write_cr0 = native_write_cr0, - .read_cr4 = native_read_cr4, .write_cr4 = native_write_cr4, #ifdef CONFIG_X86_64 .read_cr8 = native_read_cr8, @@ -343,7 +342,6 @@ __visible struct pv_cpu_ops pv_cpu_ops = { .set_ldt = native_set_ldt, .load_gdt = native_load_gdt, .load_idt = native_load_idt, - .store_idt = native_store_idt, .store_tr = native_store_tr, .load_tls = native_load_tls, #ifdef CONFIG_X86_64 @@ -411,8 +409,6 @@ struct pv_mmu_ops pv_mmu_ops __ro_after_init = { .set_pte = native_set_pte, .set_pte_at = native_set_pte_at, .set_pmd = native_set_pmd, - .set_pmd_at = native_set_pmd_at, - .pte_update = paravirt_nop, .ptep_modify_prot_start = __ptep_modify_prot_start, .ptep_modify_prot_commit = __ptep_modify_prot_commit, @@ -424,7 +420,6 @@ struct pv_mmu_ops pv_mmu_ops __ro_after_init = { .pmd_clear = native_pmd_clear, #endif .set_pud = native_set_pud, - .set_pud_at = native_set_pud_at, .pmd_val = PTE_IDENT, .make_pmd = PTE_IDENT, diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index d84afb0a322dd8515fb91424eef6cc42fcbfaee2..0957dd73d127554803f35d5be45dcddb845ca741 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1178,8 +1178,11 @@ void __init setup_arch(char **cmdline_p) * with the current CR4 value. This may not be necessary, but * auditing all the early-boot CR4 manipulation would be needed to * rule it out. + * + * Mask off features that don't work outside long mode (just + * PCIDE for now). */ - mmu_cr4_features = __read_cr4(); + mmu_cr4_features = __read_cr4() & ~X86_CR4_PCIDE; memblock_set_current_limit(get_max_mapped()); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index cd6622c3204e6b4c5c159a659b0b23f4728da34a..0854ff1692745adf4831e2deea0cb203977847d9 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -226,10 +226,12 @@ static int enable_start_cpu0; static void notrace start_secondary(void *unused) { /* - * Don't put *anything* before cpu_init(), SMP booting is too - * fragile that we want to limit the things done here to the - * most necessary things. + * Don't put *anything* except direct CPU state initialization + * before cpu_init(), SMP booting is too fragile that we want to + * limit the things done here to the most necessary things. */ + if (boot_cpu_has(X86_FEATURE_PCID)) + __write_cr4(__read_cr4() | X86_CR4_PCIDE); cpu_init(); x86_cpuinit.early_percpu_clock_init(); preempt_disable(); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 4253adef9044c10429094495e01074da46b0d864..699704d4bc9e7716e04da7298ec7f4491b436af1 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -5192,7 +5192,7 @@ static void vmx_set_constant_host_state(struct vcpu_vmx *vmx) vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS); /* 22.2.4 */ vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8); /* 22.2.4 */ - native_store_idt(&dt); + store_idt(&dt); vmcs_writel(HOST_IDTR_BASE, dt.address); /* 22.2.4 */ vmx->host_idt_base = dt.address; diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 7777ccc0e9f979dc76cc9d520885eea02114223e..af5c1ed21d43ac651ecbe02e7a58dc7baa884168 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -19,6 +19,7 @@ #include #include #include +#include /* * We need to define the tracepoints somewhere, and tlb.c @@ -193,6 +194,38 @@ static void __init probe_page_size_mask(void) } } +static void setup_pcid(void) +{ +#ifdef CONFIG_X86_64 + if (boot_cpu_has(X86_FEATURE_PCID)) { + if (boot_cpu_has(X86_FEATURE_PGE)) { + /* + * This can't be cr4_set_bits_and_update_boot() -- + * the trampoline code can't handle CR4.PCIDE and + * it wouldn't do any good anyway. Despite the name, + * cr4_set_bits_and_update_boot() doesn't actually + * cause the bits in question to remain set all the + * way through the secondary boot asm. + * + * Instead, we brute-force it and set CR4.PCIDE + * manually in start_secondary(). + */ + cr4_set_bits(X86_CR4_PCIDE); + } else { + /* + * flush_tlb_all(), as currently implemented, won't + * work if PCID is on but PGE is not. Since that + * combination doesn't exist on real hardware, there's + * no reason to try to fully support it, but it's + * polite to avoid corrupting data if we're on + * an improperly configured VM. + */ + setup_clear_cpu_cap(X86_FEATURE_PCID); + } + } +#endif +} + #ifdef CONFIG_X86_32 #define NR_RANGE_MR 3 #else /* CONFIG_X86_64 */ @@ -592,6 +625,7 @@ void __init init_mem_mapping(void) unsigned long end; probe_page_size_mask(); + setup_pcid(); #ifdef CONFIG_X86_64 end = max_pfn << PAGE_SHIFT; diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 218834a3e9adde25ea3ddf3ef2b3f513846ce1c7..b372f3442bbf3b5cff5782001e5c79b46d49edc6 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -426,10 +426,8 @@ int ptep_set_access_flags(struct vm_area_struct *vma, { int changed = !pte_same(*ptep, entry); - if (changed && dirty) { + if (changed && dirty) *ptep = entry; - pte_update(vma->vm_mm, address, ptep); - } return changed; } @@ -486,9 +484,6 @@ int ptep_test_and_clear_young(struct vm_area_struct *vma, ret = test_and_clear_bit(_PAGE_BIT_ACCESSED, (unsigned long *) &ptep->pte); - if (ret) - pte_update(vma->vm_mm, addr, ptep); - return ret; } diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 37689a7cc03b493927f50b2d14be5f72b3981a02..1ab3821f9e2629df571544077d63be950361bc20 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -121,8 +121,28 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, * hypothetical buggy code that directly switches to swapper_pg_dir * without going through leave_mm() / switch_mm_irqs_off() or that * does something like write_cr3(read_cr3_pa()). + * + * Only do this check if CONFIG_DEBUG_VM=y because __read_cr3() + * isn't free. */ - VM_BUG_ON(__read_cr3() != (__sme_pa(real_prev->pgd) | prev_asid)); +#ifdef CONFIG_DEBUG_VM + if (WARN_ON_ONCE(__read_cr3() != + (__sme_pa(real_prev->pgd) | prev_asid))) { + /* + * If we were to BUG here, we'd be very likely to kill + * the system so hard that we don't see the call trace. + * Try to recover instead by ignoring the error and doing + * a global flush to minimize the chance of corruption. + * + * (This is far from being a fully correct recovery. + * Architecturally, the CPU could prefetch something + * back into an incorrect ASID slot and leave it there + * to cause trouble down the road. It's better than + * nothing, though.) + */ + __flush_tlb_all(); + } +#endif if (real_prev == next) { VM_BUG_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) != diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c index f2598d81cd55067ffd695a18bde0352a5d98264b..f910c514438f168a5f09afbd312311498abe2c50 100644 --- a/arch/x86/power/hibernate_64.c +++ b/arch/x86/power/hibernate_64.c @@ -295,7 +295,26 @@ int arch_hibernation_header_save(void *addr, unsigned int max_size) return -EOVERFLOW; rdr->jump_address = (unsigned long)restore_registers; rdr->jump_address_phys = __pa_symbol(restore_registers); - rdr->cr3 = restore_cr3; + + /* + * The restore code fixes up CR3 and CR4 in the following sequence: + * + * [in hibernation asm] + * 1. CR3 <= temporary page tables + * 2. CR4 <= mmu_cr4_features (from the kernel that restores us) + * 3. CR3 <= rdr->cr3 + * 4. CR4 <= mmu_cr4_features (from us, i.e. the image kernel) + * [in restore_processor_state()] + * 5. CR4 <= saved CR4 + * 6. CR3 <= saved CR3 + * + * Our mmu_cr4_features has CR4.PCIDE=0, and toggling + * CR4.PCIDE while CR3's PCID bits are nonzero is illegal, so + * rdr->cr3 needs to point to valid page tables but must not + * have any of the PCID bits set. + */ + rdr->cr3 = restore_cr3 & ~CR3_PCID_MASK; + rdr->magic = RESTORE_MAGIC; hibernation_e820_save(rdr->e820_digest); diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index ae2a2e2d636286f36e570dde5b4cb9d29dc55f5b..69b9deff7e5c84c4133064ada0606eb872908152 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -1038,7 +1038,6 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = { .read_cr0 = xen_read_cr0, .write_cr0 = xen_write_cr0, - .read_cr4 = native_read_cr4, .write_cr4 = xen_write_cr4, #ifdef CONFIG_X86_64 @@ -1073,7 +1072,6 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = { .alloc_ldt = xen_alloc_ldt, .free_ldt = xen_free_ldt, - .store_idt = native_store_idt, .store_tr = xen_store_tr, .write_ldt_entry = xen_write_ldt_entry, diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index 6b983b300666bd178c5b898f35e6c8c134d2a800..509f560bd0c6d4731cac96fc64296184e6818b9c 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -2409,8 +2409,6 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = { .flush_tlb_single = xen_flush_tlb_single, .flush_tlb_others = xen_flush_tlb_others, - .pte_update = paravirt_nop, - .pgd_alloc = xen_pgd_alloc, .pgd_free = xen_pgd_free,