Merge branch 'x86-paravirt-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 paravirt updates from Ingo Molnar: "Two main changes: - Remove no longer used parts of the paravirt infrastructure and put large quantities of paravirt ops under a new config option PARAVIRT_XXL=y, which is selected by XEN_PV only. (Joergen Gross) - Enable PV spinlocks on Hyperv (Yi Sun)" * 'x86-paravirt-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/hyperv: Enable PV qspinlock for Hyper-V x86/hyperv: Add GUEST_IDLE_MSR support x86/paravirt: Clean up native_patch() x86/paravirt: Prevent redefinition of SAVE_FLAGS macro x86/xen: Make xen_reservation_lock static x86/paravirt: Remove unneeded mmu related paravirt ops bits x86/paravirt: Move the Xen-only pv_mmu_ops under the PARAVIRT_XXL umbrella x86/paravirt: Move the pv_irq_ops under the PARAVIRT_XXL umbrella x86/paravirt: Move the Xen-only pv_cpu_ops under the PARAVIRT_XXL umbrella x86/paravirt: Move items in pv_info under PARAVIRT_XXL umbrella x86/paravirt: Introduce new config option PARAVIRT_XXL x86/paravirt: Remove unused paravirt bits x86/paravirt: Use a single ops structure x86/paravirt: Remove clobbers from struct paravirt_patch_site x86/paravirt: Remove clobbers parameter from paravirt patch functions x86/paravirt: Make paravirt_patch_call() and paravirt_patch_jmp() static x86/xen: Add SPDX identifier in arch/x86/xen files x86/xen: Link platform-pci-unplug.o only if CONFIG_XEN_PVHVM x86/xen: Move pv specific parts of arch/x86/xen/mmu.c to mmu_pv.c x86/xen: Move pv irq related functions under CONFIG_XEN_PV umbrella

Merge branch 'x86-paravirt-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 paravirt updates from Ingo Molnar: "Two main changes: - Remove no longer used parts of the paravirt infrastructure and put large quantities of paravirt ops under a new config option PARAVIRT_XXL=y, which is selected by XEN_PV only. (Joergen Gross) - Enable PV spinlocks on Hyperv (Yi Sun)" * 'x86-paravirt-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/hyperv: Enable PV qspinlock for Hyper-V x86/hyperv: Add GUEST_IDLE_MSR support x86/paravirt: Clean up native_patch() x86/paravirt: Prevent redefinition of SAVE_FLAGS macro x86/xen: Make xen_reservation_lock static x86/paravirt: Remove unneeded mmu related paravirt ops bits x86/paravirt: Move the Xen-only pv_mmu_ops under the PARAVIRT_XXL umbrella x86/paravirt: Move the pv_irq_ops under the PARAVIRT_XXL umbrella x86/paravirt: Move the Xen-only pv_cpu_ops under the PARAVIRT_XXL umbrella x86/paravirt: Move items in pv_info under PARAVIRT_XXL umbrella x86/paravirt: Introduce new config option PARAVIRT_XXL x86/paravirt: Remove unused paravirt bits x86/paravirt: Use a single ops structure x86/paravirt: Remove clobbers from struct paravirt_patch_site x86/paravirt: Remove clobbers parameter from paravirt patch functions x86/paravirt: Make paravirt_patch_call() and paravirt_patch_jmp() static x86/xen: Add SPDX identifier in arch/x86/xen files x86/xen: Link platform-pci-unplug.o only if CONFIG_XEN_PVHVM x86/xen: Move pv specific parts of arch/x86/xen/mmu.c to mmu_pv.c x86/xen: Move pv irq related functions under CONFIG_XEN_PV umbrella
f682a792 · Linus Torvalds · 99792e0c · 3a025de6 · f682a792 · f682a792
68 changed file
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1390,6 +1390,11 @@
 	hvc_iucv_allow=	[S390]	Comma-separated list of z/VM user IDs.
 				If specified, z/VM IUCV HVC accepts connections
 				from listed z/VM user IDs only.
+	hv_nopvspin	[X86,HYPER_V] Disables the paravirt spinlock optimizations
+				      which allow the hypervisor to 'idle' the
+				      guest on lock contention.
 	keep_bootcon	[KNL]
 			Do not unregister boot console at start. This is only
 			useful for debugging when something happens in the window

--- a/arch/arm/include/asm/paravirt.h
+++ b/arch/arm/include/asm/paravirt.h
@@ -10,11 +10,16 @@ extern struct static_key paravirt_steal_rq_enabled;
 struct pv_time_ops {
 	unsigned long long (*steal_clock)(int cpu);
 };
-extern struct pv_time_ops pv_time_ops;
+struct paravirt_patch_template {
+	struct pv_time_ops time;
+};
+extern struct paravirt_patch_template pv_ops;
 static inline u64 paravirt_steal_clock(int cpu)
 {
-	return pv_time_ops.steal_clock(cpu);
+	return pv_ops.time.steal_clock(cpu);
 }
 #endif

--- a/arch/arm/kernel/paravirt.c
+++ b/arch/arm/kernel/paravirt.c
@@ -21,5 +21,5 @@
 struct static_key paravirt_steal_enabled;
 struct static_key paravirt_steal_rq_enabled;
-struct pv_time_ops pv_time_ops;
+struct paravirt_patch_template pv_ops;
-EXPORT_SYMBOL_GPL(pv_time_ops);
+EXPORT_SYMBOL_GPL(pv_ops);
--- a/arch/arm/xen/enlighten.c
+++ b/arch/arm/xen/enlighten.c
@@ -62,29 +62,6 @@ static __read_mostly unsigned int xen_events_irq;
 uint32_t xen_start_flags;
 EXPORT_SYMBOL(xen_start_flags);
-int xen_remap_domain_gfn_array(struct vm_area_struct *vma,
-			       unsigned long addr,
-			       xen_pfn_t *gfn, int nr,
-			       int *err_ptr, pgprot_t prot,
-			       unsigned domid,
-			       struct page **pages)
-{
-	return xen_xlate_remap_gfn_array(vma, addr, gfn, nr, err_ptr,
-					 prot, domid, pages);
-}
-EXPORT_SYMBOL_GPL(xen_remap_domain_gfn_array);
-/* Not used by XENFEAT_auto_translated guests. */
-int xen_remap_domain_gfn_range(struct vm_area_struct *vma,
-                              unsigned long addr,
-                              xen_pfn_t gfn, int nr,
-                              pgprot_t prot, unsigned domid,
-                              struct page **pages)
-{
-	return -ENOSYS;
-}
-EXPORT_SYMBOL_GPL(xen_remap_domain_gfn_range);
 int xen_unmap_domain_gfn_range(struct vm_area_struct *vma,
 			       int nr, struct page **pages)
 {
@@ -92,17 +69,6 @@ int xen_unmap_domain_gfn_range(struct vm_area_struct *vma,
 }
 EXPORT_SYMBOL_GPL(xen_unmap_domain_gfn_range);
-/* Not used by XENFEAT_auto_translated guests. */
-int xen_remap_domain_mfn_array(struct vm_area_struct *vma,
-			       unsigned long addr,
-			       xen_pfn_t *mfn, int nr,
-			       int *err_ptr, pgprot_t prot,
-			       unsigned int domid, struct page **pages)
-{
-	return -ENOSYS;
-}
-EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_array);
 static void xen_read_wallclock(struct timespec64 *ts)
 {
 	u32 version;

--- a/arch/arm64/include/asm/paravirt.h
+++ b/arch/arm64/include/asm/paravirt.h
@@ -10,11 +10,16 @@ extern struct static_key paravirt_steal_rq_enabled;
 struct pv_time_ops {
 	unsigned long long (*steal_clock)(int cpu);
 };
-extern struct pv_time_ops pv_time_ops;
+struct paravirt_patch_template {
+	struct pv_time_ops time;
+};
+extern struct paravirt_patch_template pv_ops;
 static inline u64 paravirt_steal_clock(int cpu)
 {
-	return pv_time_ops.steal_clock(cpu);
+	return pv_ops.time.steal_clock(cpu);
 }
 #endif

--- a/arch/arm64/kernel/paravirt.c
+++ b/arch/arm64/kernel/paravirt.c
@@ -21,5 +21,5 @@
 struct static_key paravirt_steal_enabled;
 struct static_key paravirt_steal_rq_enabled;
-struct pv_time_ops pv_time_ops;
+struct paravirt_patch_template pv_ops;
-EXPORT_SYMBOL_GPL(pv_time_ops);
+EXPORT_SYMBOL_GPL(pv_ops);
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -523,6 +523,7 @@ config X86_VSMP
 	bool "ScaleMP vSMP"
 	select HYPERVISOR_GUEST
 	select PARAVIRT
+	select PARAVIRT_XXL
 	depends on X86_64 && PCI
 	depends on X86_EXTENDED_PLATFORM
 	depends on SMP
@@ -753,6 +754,9 @@ config PARAVIRT
 	  over full virtualization.  However, when run without a hypervisor
 	  the kernel is theoretically slower and slightly larger.
+config PARAVIRT_XXL
+	bool
 config PARAVIRT_DEBUG
 	bool "paravirt-ops debugging"
 	depends on PARAVIRT && DEBUG_KERNEL

--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -9,6 +9,7 @@
 * paravirt and debugging variants are added.)
 */
 #undef CONFIG_PARAVIRT
+#undef CONFIG_PARAVIRT_XXL
 #undef CONFIG_PARAVIRT_SPINLOCKS
 #undef CONFIG_KASAN

--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -783,7 +783,7 @@ GLOBAL(__begin_SYSENTER_singlestep_region)
 * will ignore all of the single-step traps generated in this range.
 */
-#ifdef CONFIG_XEN
+#ifdef CONFIG_XEN_PV
 /*
 * Xen doesn't set %esp to be precisely what the normal SYSENTER
 * entry point expects, so fix it up before using the normal path.
@@ -1241,7 +1241,7 @@ ENTRY(spurious_interrupt_bug)
 	jmp	common_exception
 END(spurious_interrupt_bug)
-#ifdef CONFIG_XEN
+#ifdef CONFIG_XEN_PV
 ENTRY(xen_hypervisor_callback)
 	pushl	$-1				/* orig_ax = -1 => not a system call */
 	SAVE_ALL
@@ -1322,11 +1322,13 @@ ENTRY(xen_failsafe_callback)
 	_ASM_EXTABLE(3b, 8b)
 	_ASM_EXTABLE(4b, 9b)
 ENDPROC(xen_failsafe_callback)
+#endif /* CONFIG_XEN_PV */
+#ifdef CONFIG_XEN_PVHVM
 BUILD_INTERRUPT3(xen_hvm_callback_vector, HYPERVISOR_CALLBACK_VECTOR,
 		 xen_evtchn_do_upcall)
+#endif
-#endif /* CONFIG_XEN */
 #if IS_ENABLED(CONFIG_HYPERV)

--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1050,7 +1050,7 @@ ENTRY(do_softirq_own_stack)
 	ret
 ENDPROC(do_softirq_own_stack)
-#ifdef CONFIG_XEN
+#ifdef CONFIG_XEN_PV
 idtentry hypervisor_callback xen_do_hypervisor_callback has_error_code=0
 /*
@@ -1130,11 +1130,13 @@ ENTRY(xen_failsafe_callback)
 	ENCODE_FRAME_POINTER
 	jmp	error_exit
 END(xen_failsafe_callback)
+#endif /* CONFIG_XEN_PV */
+#ifdef CONFIG_XEN_PVHVM
 apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
 	xen_hvm_callback_vector xen_evtchn_do_upcall
+#endif
-#endif /* CONFIG_XEN */
 #if IS_ENABLED(CONFIG_HYPERV)
 apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
@@ -1151,7 +1153,7 @@ idtentry debug			do_debug		has_error_code=0	paranoid=1 shift_ist=DEBUG_STACK
 idtentry int3			do_int3			has_error_code=0
 idtentry stack_segment		do_stack_segment	has_error_code=1
-#ifdef CONFIG_XEN
+#ifdef CONFIG_XEN_PV
 idtentry xennmi			do_nmi			has_error_code=0
 idtentry xendebug		do_debug		has_error_code=0
 idtentry xenint3		do_int3			has_error_code=0

--- a/arch/x86/hyperv/Makefile
+++ b/arch/x86/hyperv/Makefile
 obj-y			:= hv_init.o mmu.o nested.o
 obj-$(CONFIG_X86_64)	+= hv_apic.o
+ifdef CONFIG_X86_64
+obj-$(CONFIG_PARAVIRT_SPINLOCKS)	+= hv_spinlock.o
+endif
--- a/arch/x86/hyperv/hv_spinlock.c
+++ b/arch/x86/hyperv/hv_spinlock.c
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Hyper-V specific spinlock code.
+ *
+ * Copyright (C) 2018, Intel, Inc.
+ *
+ * Author : Yi Sun <yi.y.sun@intel.com>
+ */
+#define pr_fmt(fmt) "Hyper-V: " fmt
+#include <linux/spinlock.h>
+#include <asm/mshyperv.h>
+#include <asm/paravirt.h>
+#include <asm/apic.h>
+static bool __initdata hv_pvspin = true;
+static void hv_qlock_kick(int cpu)
+{
+	apic->send_IPI(cpu, X86_PLATFORM_IPI_VECTOR);
+}
+static void hv_qlock_wait(u8 *byte, u8 val)
+{
+	unsigned long msr_val;
+	unsigned long flags;
+	if (in_nmi())
+		return;
+	/*
+	 * Reading HV_X64_MSR_GUEST_IDLE MSR tells the hypervisor that the
+	 * vCPU can be put into 'idle' state. This 'idle' state is
+	 * terminated by an IPI, usually from hv_qlock_kick(), even if
+	 * interrupts are disabled on the vCPU.
+	 *
+	 * To prevent a race against the unlock path it is required to
+	 * disable interrupts before accessing the HV_X64_MSR_GUEST_IDLE
+	 * MSR. Otherwise, if the IPI from hv_qlock_kick() arrives between
+	 * the lock value check and the rdmsrl() then the vCPU might be put
+	 * into 'idle' state by the hypervisor and kept in that state for
+	 * an unspecified amount of time.
+	 */
+	local_irq_save(flags);
+	/*
+	 * Only issue the rdmsrl() when the lock state has not changed.
+	 */
+	if (READ_ONCE(*byte) == val)
+		rdmsrl(HV_X64_MSR_GUEST_IDLE, msr_val);
+	local_irq_restore(flags);
+}
+/*
+ * Hyper-V does not support this so far.
+ */
+bool hv_vcpu_is_preempted(int vcpu)
+{
+	return false;
+}
+PV_CALLEE_SAVE_REGS_THUNK(hv_vcpu_is_preempted);
+void __init hv_init_spinlocks(void)
+{
+	if (!hv_pvspin || !apic ||
+	    !(ms_hyperv.hints & HV_X64_CLUSTER_IPI_RECOMMENDED) ||
+	    !(ms_hyperv.features & HV_X64_MSR_GUEST_IDLE_AVAILABLE)) {
+		pr_info("PV spinlocks disabled\n");
+		return;
+	}
+	pr_info("PV spinlocks enabled\n");
+	__pv_init_lock_hash();
+	pv_ops.lock.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath;
+	pv_ops.lock.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock);
+	pv_ops.lock.wait = hv_qlock_wait;
+	pv_ops.lock.kick = hv_qlock_kick;
+	pv_ops.lock.vcpu_is_preempted = PV_CALLEE_SAVE(hv_vcpu_is_preempted);
+}
+static __init int hv_parse_nopvspin(char *arg)
+{
+	hv_pvspin = false;
+	return 0;
+}
+early_param("hv_nopvspin", hv_parse_nopvspin);
--- a/arch/x86/hyperv/mmu.c
+++ b/arch/x86/hyperv/mmu.c
@@ -231,6 +231,6 @@ void hyperv_setup_mmu_ops(void)
 		return;
 	pr_info("Using hypercall for remote TLB flush\n");
-	pv_mmu_ops.flush_tlb_others = hyperv_flush_tlb_others;
+	pv_ops.mmu.flush_tlb_others = hyperv_flush_tlb_others;
-	pv_mmu_ops.tlb_remove_table = tlb_remove_table;
+	pv_ops.mmu.tlb_remove_table = tlb_remove_table;
 }
--- a/arch/x86/include/asm/debugreg.h
+++ b/arch/x86/include/asm/debugreg.h
@@ -8,7 +8,7 @@
 DECLARE_PER_CPU(unsigned long, cpu_dr7);
-#ifndef CONFIG_PARAVIRT
+#ifndef CONFIG_PARAVIRT_XXL
 /*
 * These special macros can be used to get or set a debugging register
 */

--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -108,7 +108,7 @@ static inline int desc_empty(const void *ptr)
 	return !(desc[0] | desc[1]);
 }
-#ifdef CONFIG_PARAVIRT
+#ifdef CONFIG_PARAVIRT_XXL
 #include <asm/paravirt.h>
 #else
 #define load_TR_desc()				native_load_tr_desc()
@@ -134,7 +134,7 @@ static inline void paravirt_alloc_ldt(struct desc_struct *ldt, unsigned entries)
 static inline void paravirt_free_ldt(struct desc_struct *ldt, unsigned entries)
 {
 }
-#endif	/* CONFIG_PARAVIRT */
+#endif	/* CONFIG_PARAVIRT_XXL */
 #define store_ldt(ldt) asm("sldt %0" : "=m"(ldt))

--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -162,7 +162,7 @@ void __native_set_fixmap(enum fixed_addresses idx, pte_t pte);
 void native_set_fixmap(enum fixed_addresses idx,
 		       phys_addr_t phys, pgprot_t flags);
-#ifndef CONFIG_PARAVIRT
+#ifndef CONFIG_PARAVIRT_XXL
 static inline void __set_fixmap(enum fixed_addresses idx,
 				phys_addr_t phys, pgprot_t flags)
 {

--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -38,6 +38,8 @@
 #define HV_MSR_TIME_REF_COUNT_AVAILABLE		(1 << 1)
 /* Partition reference TSC MSR is available */
 #define HV_MSR_REFERENCE_TSC_AVAILABLE		(1 << 9)
+/* Partition Guest IDLE MSR is available */
+#define HV_X64_MSR_GUEST_IDLE_AVAILABLE		(1 << 10)
 /* A partition's reference time stamp counter (TSC) page */
 #define HV_X64_MSR_REFERENCE_TSC		0x40000021
@@ -246,6 +248,9 @@
 #define HV_X64_MSR_STIMER3_CONFIG		0x400000B6
 #define HV_X64_MSR_STIMER3_COUNT		0x400000B7
+/* Hyper-V guest idle MSR */
+#define HV_X64_MSR_GUEST_IDLE			0x400000F0
 /* Hyper-V guest crash notification MSR's */
 #define HV_X64_MSR_CRASH_P0			0x40000100
 #define HV_X64_MSR_CRASH_P1			0x40000101

--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -64,7 +64,7 @@ static inline __cpuidle void native_halt(void)
 #endif
-#ifdef CONFIG_PARAVIRT
+#ifdef CONFIG_PARAVIRT_XXL
 #include <asm/paravirt.h>
 #else
 #ifndef __ASSEMBLY__
@@ -123,6 +123,10 @@ static inline notrace unsigned long arch_local_irq_save(void)
 #define DISABLE_INTERRUPTS(x)	cli
 #ifdef CONFIG_X86_64
+#ifdef CONFIG_DEBUG_ENTRY
+#define SAVE_FLAGS(x)		pushfq; popq %rax
+#endif
 #define SWAPGS	swapgs
 /*
 * Currently paravirt can't handle swapgs nicely when we
@@ -135,8 +139,6 @@ static inline notrace unsigned long arch_local_irq_save(void)
 */
 #define SWAPGS_UNSAFE_STACK	swapgs
-#define PARAVIRT_ADJUST_EXCEPTION_FRAME	/*  */
 #define INTERRUPT_RETURN	jmp native_iret
 #define USERGS_SYSRET64				\
 	swapgs;					\
@@ -145,18 +147,12 @@ static inline notrace unsigned long arch_local_irq_save(void)
 	swapgs;					\
 	sysretl
-#ifdef CONFIG_DEBUG_ENTRY
-#define SAVE_FLAGS(x)		pushfq; popq %rax
-#endif
 #else
 #define INTERRUPT_RETURN		iret
-#define ENABLE_INTERRUPTS_SYSEXIT	sti; sysexit
-#define GET_CR0_INTO_EAX		movl %cr0, %eax
 #endif
 #endif /* __ASSEMBLY__ */
-#endif /* CONFIG_PARAVIRT */
+#endif /* CONFIG_PARAVIRT_XXL */
 #ifndef __ASSEMBLY__
 static inline int arch_irqs_disabled_flags(unsigned long flags)

--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -16,12 +16,12 @@
 extern atomic64_t last_mm_ctx_id;
-#ifndef CONFIG_PARAVIRT
+#ifndef CONFIG_PARAVIRT_XXL
 static inline void paravirt_activate_mm(struct mm_struct *prev,
 					struct mm_struct *next)
 {
 }
-#endif	/* !CONFIG_PARAVIRT */
+#endif	/* !CONFIG_PARAVIRT_XXL */
 #ifdef CONFIG_PERF_EVENTS

--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -351,6 +351,8 @@ int hyperv_flush_guest_mapping(u64 as);
 #ifdef CONFIG_X86_64
 void hv_apic_init(void);
+void __init hv_init_spinlocks(void);
+bool hv_vcpu_is_preempted(int vcpu);
 #else
 static inline void hv_apic_init(void) {}
 #endif

--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -242,7 +242,7 @@ static inline unsigned long long native_read_pmc(int counter)
 	return EAX_EDX_VAL(val, low, high);
 }
-#ifdef CONFIG_PARAVIRT
+#ifdef CONFIG_PARAVIRT_XXL
 #include <asm/paravirt.h>
 #else
 #include <linux/errno.h>
@@ -305,7 +305,7 @@ do {							\
 #define rdpmcl(counter, val) ((val) = native_read_pmc(counter))
-#endif	/* !CONFIG_PARAVIRT */
+#endif	/* !CONFIG_PARAVIRT_XXL */
 /*
 * 64-bit version of wrmsr_safe():

--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -66,11 +66,13 @@ struct paravirt_callee_save {
 /* general info */
 struct pv_info {
+#ifdef CONFIG_PARAVIRT_XXL
 	unsigned int kernel_rpl;
 	int shared_kernel_pmd;
 #ifdef CONFIG_X86_64
 	u16 extra_user_64bit_cs;  /* __USER_CS if none */
+#endif
 #endif
 	const char *name;
@@ -85,17 +87,18 @@ struct pv_init_ops {
 	 * the number of bytes of code generated, as we nop pad the
 	 * rest in generic code.
 	 */
-	unsigned (*patch)(u8 type, u16 clobber, void *insnbuf,
+	unsigned (*patch)(u8 type, void *insnbuf,
 			  unsigned long addr, unsigned len);
 } __no_randomize_layout;
+#ifdef CONFIG_PARAVIRT_XXL
 struct pv_lazy_ops {
 	/* Set deferred update mode, used for batching operations. */
 	void (*enter)(void);
 	void (*leave)(void);
 	void (*flush)(void);
 } __no_randomize_layout;
+#endif
 struct pv_time_ops {
 	unsigned long long (*sched_clock)(void);
@@ -104,6 +107,9 @@ struct pv_time_ops {
 struct pv_cpu_ops {
 	/* hooks for various privileged instructions */
+	void (*io_delay)(void);
+#ifdef CONFIG_PARAVIRT_XXL
 	unsigned long (*get_debugreg)(int regno);
 	void (*set_debugreg)(int regno, unsigned long value);
@@ -141,7 +147,6 @@ struct pv_cpu_ops {
 	void (*set_iopl_mask)(unsigned mask);
 	void (*wbinvd)(void);
-	void (*io_delay)(void);
 	/* cpuid emulation, mostly so that caps bits can be disabled */
 	void (*cpuid)(unsigned int *eax, unsigned int *ebx,
@@ -176,9 +181,11 @@ struct pv_cpu_ops {
 	void (*start_context_switch)(struct task_struct *prev);
 	void (*end_context_switch)(struct task_struct *next);
+#endif
 } __no_randomize_layout;
 struct pv_irq_ops {
+#ifdef CONFIG_PARAVIRT_XXL
 	/*
 	 * Get/set interrupt state.  save_fl and restore_fl are only
 	 * expected to use X86_EFLAGS_IF; all other bits
@@ -195,35 +202,34 @@ struct pv_irq_ops {
 	void (*safe_halt)(void);
 	void (*halt)(void);
+#endif
 } __no_randomize_layout;
 struct pv_mmu_ops {
+	/* TLB operations */
+	void (*flush_tlb_user)(void);
+	void (*flush_tlb_kernel)(void);
+	void (*flush_tlb_one_user)(unsigned long addr);
+	void (*flush_tlb_others)(const struct cpumask *cpus,
+				 const struct flush_tlb_info *info);
+	void (*tlb_remove_table)(struct mmu_gather *tlb, void *table);
+	/* Hook for intercepting the destruction of an mm_struct. */
+	void (*exit_mmap)(struct mm_struct *mm);
+#ifdef CONFIG_PARAVIRT_XXL
 	unsigned long (*read_cr2)(void);
 	void (*write_cr2)(unsigned long);
 	unsigned long (*read_cr3)(void);
 	void (*write_cr3)(unsigned long);
-	/*
+	/* Hooks for intercepting the creation/use of an mm_struct. */
-	 * Hooks for intercepting the creation/use/destruction of an
-	 * mm_struct.
-	 */
 	void (*activate_mm)(struct mm_struct *prev,
 			    struct mm_struct *next);
 	void (*dup_mmap)(struct mm_struct *oldmm,
 			 struct mm_struct *mm);
-	void (*exit_mmap)(struct mm_struct *mm);
-	/* TLB operations */
-	void (*flush_tlb_user)(void);
-	void (*flush_tlb_kernel)(void);
-	void (*flush_tlb_one_user)(unsigned long addr);
-	void (*flush_tlb_others)(const struct cpumask *cpus,
-				 const struct flush_tlb_info *info);
-	void (*tlb_remove_table)(struct mmu_gather *tlb, void *table);
 	/* Hooks for allocating and freeing a pagetable top-level */
 	int  (*pgd_alloc)(struct mm_struct *mm);
@@ -298,6 +304,7 @@ struct pv_mmu_ops {
 	   an mfn.  We can tell which is which from the index. */
 	void (*set_fixmap)(unsigned /* enum fixed_addresses */ idx,
 			   phys_addr_t phys, pgprot_t flags);
+#endif
 } __no_randomize_layout;
 struct arch_spinlock;
@@ -321,28 +328,23 @@ struct pv_lock_ops {
 * number for each function using the offset which we use to indicate
 * what to patch. */
 struct paravirt_patch_template {
-	struct pv_init_ops pv_init_ops;
+	struct pv_init_ops	init;
-	struct pv_time_ops pv_time_ops;
+	struct pv_time_ops	time;
-	struct pv_cpu_ops pv_cpu_ops;
+	struct pv_cpu_ops	cpu;
-	struct pv_irq_ops pv_irq_ops;
+	struct pv_irq_ops	irq;
-	struct pv_mmu_ops pv_mmu_ops;
+	struct pv_mmu_ops	mmu;
-	struct pv_lock_ops pv_lock_ops;
+	struct pv_lock_ops	lock;
 } __no_randomize_layout;
 extern struct pv_info pv_info;
-extern struct pv_init_ops pv_init_ops;
+extern struct paravirt_patch_template pv_ops;
-extern struct pv_time_ops pv_time_ops;
-extern struct pv_cpu_ops pv_cpu_ops;
-extern struct pv_irq_ops pv_irq_ops;
-extern struct pv_mmu_ops pv_mmu_ops;
-extern struct pv_lock_ops pv_lock_ops;
 #define PARAVIRT_PATCH(x)					\
 	(offsetof(struct paravirt_patch_template, x) / sizeof(void *))
 #define paravirt_type(op)				\
 	[paravirt_typenum] "i" (PARAVIRT_PATCH(op)),	\
-	[paravirt_opptr] "i" (&(op))
+	[paravirt_opptr] "i" (&(pv_ops.op))
 #define paravirt_clobber(clobber)		\
 	[paravirt_clobber] "i" (clobber)
@@ -361,20 +363,13 @@ extern struct pv_lock_ops pv_lock_ops;
 unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len);
 unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len);
-unsigned paravirt_patch_call(void *insnbuf,
+unsigned paravirt_patch_default(u8 type, void *insnbuf,
-			     const void *target, u16 tgt_clobbers,
-			     unsigned long addr, u16 site_clobbers,
-			     unsigned len);
-unsigned paravirt_patch_jmp(void *insnbuf, const void *target,
-			    unsigned long addr, unsigned len);
-unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf,
 				unsigned long addr, unsigned len);
 unsigned paravirt_patch_insns(void *insnbuf, unsigned len,
 			      const char *start, const char *end);
-unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
+unsigned native_patch(u8 type, void *ibuf, unsigned long addr, unsigned len);
-		      unsigned long addr, unsigned len);
 int paravirt_disable_iospace(void);
@@ -488,9 +483,9 @@ int paravirt_disable_iospace(void);
 #endif	/* CONFIG_X86_32 */
 #ifdef CONFIG_PARAVIRT_DEBUG
-#define PVOP_TEST_NULL(op)	BUG_ON(op == NULL)
+#define PVOP_TEST_NULL(op)	BUG_ON(pv_ops.op == NULL)
 #else
-#define PVOP_TEST_NULL(op)	((void)op)
+#define PVOP_TEST_NULL(op)	((void)pv_ops.op)
 #endif
 #define PVOP_RETMASK(rettype)						\
@@ -666,7 +661,6 @@ struct paravirt_patch_site {
 	u8 *instr; 		/* original instructions */
 	u8 instrtype;		/* type of this instruction */
 	u8 len;			/* length of original instruction */
-	u16 clobbers;		/* what registers you may clobber */
 };
 extern struct paravirt_patch_site __parainstructions[],

--- a/arch/x86/include/asm/pgalloc.h
+++ b/arch/x86/include/asm/pgalloc.h
@@ -8,7 +8,7 @@
 static inline int  __paravirt_pgd_alloc(struct mm_struct *mm) { return 0; }
-#ifdef CONFIG_PARAVIRT
+#ifdef CONFIG_PARAVIRT_XXL
 #include <asm/paravirt.h>
 #else
 #define paravirt_pgd_alloc(mm)	__paravirt_pgd_alloc(mm)

--- a/arch/x86/include/asm/pgtable-3level_types.h
+++ b/arch/x86/include/asm/pgtable-3level_types.h
@@ -20,7 +20,7 @@ typedef union {
 } pte_t;
 #endif	/* !__ASSEMBLY__ */
-#ifdef CONFIG_PARAVIRT
+#ifdef CONFIG_PARAVIRT_XXL
 #define SHARED_KERNEL_PMD	((!static_cpu_has(X86_FEATURE_PTI) &&	\
 				 (pv_info.shared_kernel_pmd)))
 #else

--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -55,9 +55,9 @@ extern struct mm_struct *pgd_page_get_mm(struct page *page);
 extern pmdval_t early_pmd_flags;
-#ifdef CONFIG_PARAVIRT
+#ifdef CONFIG_PARAVIRT_XXL
 #include <asm/paravirt.h>
-#else  /* !CONFIG_PARAVIRT */
+#else  /* !CONFIG_PARAVIRT_XXL */
 #define set_pte(ptep, pte)		native_set_pte(ptep, pte)
 #define set_pte_at(mm, addr, ptep, pte)	native_set_pte_at(mm, addr, ptep, pte)
@@ -112,8 +112,7 @@ extern pmdval_t early_pmd_flags;
 #define __pte(x)	native_make_pte(x)
 #define arch_end_context_switch(prev)	do {} while(0)
+#endif	/* CONFIG_PARAVIRT_XXL */
-#endif	/* CONFIG_PARAVIRT */
 /*
 * The following only work if pte_present() is true.

--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -579,7 +579,7 @@ static inline bool on_thread_stack(void)
 			       current_stack_pointer) < THREAD_SIZE;
 }
-#ifdef CONFIG_PARAVIRT
+#ifdef CONFIG_PARAVIRT_XXL
 #include <asm/paravirt.h>
 #else
 #define __cpuid			native_cpuid
@@ -590,7 +590,7 @@ static inline void load_sp0(unsigned long sp0)
 }
 #define set_iopl_mask native_set_iopl_mask
-#endif /* CONFIG_PARAVIRT */
+#endif /* CONFIG_PARAVIRT_XXL */
 /* Free all resources held by a thread. */
 extern void release_thread(struct task_struct *);

--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -146,7 +146,7 @@ static inline int v8086_mode(struct pt_regs *regs)
 static inline bool user_64bit_mode(struct pt_regs *regs)
 {
 #ifdef CONFIG_X86_64
-#ifndef CONFIG_PARAVIRT
+#ifndef CONFIG_PARAVIRT_XXL
 	/*
 	 * On non-paravirt systems, this is the only long mode CPL 3
 	 * selector.  We do not allow long mode selectors in the LDT.

--- a/arch/x86/include/asm/segment.h
+++ b/arch/x86/include/asm/segment.h
@@ -210,7 +210,7 @@
 #endif
-#ifndef CONFIG_PARAVIRT
+#ifndef CONFIG_PARAVIRT_XXL
 # define get_kernel_rpl()		0
 #endif

--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -141,7 +141,7 @@ static inline unsigned long __read_cr4(void)
 	return native_read_cr4();
 }
-#ifdef CONFIG_PARAVIRT
+#ifdef CONFIG_PARAVIRT_XXL
 #include <asm/paravirt.h>
 #else
@@ -208,7 +208,7 @@ static inline void load_gs_index(unsigned selector)
 #endif
-#endif/* CONFIG_PARAVIRT */
+#endif /* CONFIG_PARAVIRT_XXL */
 static inline void clflush(volatile void *__p)
 {

--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -598,7 +598,7 @@ void __init_or_module apply_paravirt(struct paravirt_patch_site *start,
 		BUG_ON(p->len > MAX_PATCH_LEN);
 		/* prep the buffer with the original instructions */
 		memcpy(insnbuf, p->instr, p->len);
-		used = pv_init_ops.patch(p->instrtype, p->clobbers, insnbuf,
+		used = pv_ops.init.patch(p->instrtype, insnbuf,
 					 (unsigned long)p->instr, p->len);
 		BUG_ON(used > p->len);

--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -64,15 +64,12 @@ void common(void) {
 	OFFSET(IA32_RT_SIGFRAME_sigcontext, rt_sigframe_ia32, uc.uc_mcontext);
 #endif
-#ifdef CONFIG_PARAVIRT
+#ifdef CONFIG_PARAVIRT_XXL
 	BLANK();
-	OFFSET(PARAVIRT_PATCH_pv_cpu_ops, paravirt_patch_template, pv_cpu_ops);
+	OFFSET(PV_IRQ_irq_disable, paravirt_patch_template, irq.irq_disable);
-	OFFSET(PARAVIRT_PATCH_pv_irq_ops, paravirt_patch_template, pv_irq_ops);
+	OFFSET(PV_IRQ_irq_enable, paravirt_patch_template, irq.irq_enable);
-	OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
+	OFFSET(PV_CPU_iret, paravirt_patch_template, cpu.iret);
-	OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable);
+	OFFSET(PV_MMU_read_cr2, paravirt_patch_template, mmu.read_cr2);
-	OFFSET(PV_CPU_iret, pv_cpu_ops, iret);
-	OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0);
-	OFFSET(PV_MMU_read_cr2, pv_mmu_ops, read_cr2);
 #endif
 #ifdef CONFIG_XEN

--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -21,10 +21,13 @@ static char syscalls_ia32[] = {
 int main(void)
 {
 #ifdef CONFIG_PARAVIRT
-	OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64);
+#ifdef CONFIG_PARAVIRT_XXL
-	OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs);
+	OFFSET(PV_CPU_usergs_sysret64, paravirt_patch_template,
+	       cpu.usergs_sysret64);
+	OFFSET(PV_CPU_swapgs, paravirt_patch_template, cpu.swapgs);
 #ifdef CONFIG_DEBUG_ENTRY
-	OFFSET(PV_IRQ_save_fl, pv_irq_ops, save_fl);
+	OFFSET(PV_IRQ_save_fl, paravirt_patch_template, irq.save_fl);
+#endif
 #endif
 	BLANK();
 #endif

--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1243,10 +1243,10 @@ static void generic_identify(struct cpuinfo_x86 *c)
 	 * ESPFIX issue, we can change this.
 	 */
 #ifdef CONFIG_X86_32
-# ifdef CONFIG_PARAVIRT
+# ifdef CONFIG_PARAVIRT_XXL
 	do {
 		extern void native_iret(void);
-		if (pv_cpu_ops.iret == native_iret)
+		if (pv_ops.cpu.iret == native_iret)
 			set_cpu_bug(c, X86_BUG_ESPFIX);
 	} while (0);
 # else

--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -199,6 +199,16 @@ static unsigned long hv_get_tsc_khz(void)
 	return freq / 1000;
 }
+#if defined(CONFIG_SMP) && IS_ENABLED(CONFIG_HYPERV)
+static void __init hv_smp_prepare_boot_cpu(void)
+{
+	native_smp_prepare_boot_cpu();
+#if defined(CONFIG_X86_64) && defined(CONFIG_PARAVIRT_SPINLOCKS)
+	hv_init_spinlocks();
+#endif
+}
+#endif
 static void __init ms_hyperv_init_platform(void)
 {
 	int hv_host_info_eax;
@@ -303,6 +313,10 @@ static void __init ms_hyperv_init_platform(void)
 	if (ms_hyperv.misc_features & HV_STIMER_DIRECT_MODE_AVAILABLE)
 		alloc_intr_gate(HYPERV_STIMER0_VECTOR,
 				hv_stimer0_callback_vector);
+# ifdef CONFIG_SMP
+	smp_ops.smp_prepare_boot_cpu = hv_smp_prepare_boot_cpu;
+# endif
 #endif
 }

--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -97,14 +97,14 @@ static void __init vmware_sched_clock_setup(void)
 	d->cyc2ns_offset = mul_u64_u32_shr(tsc_now, d->cyc2ns_mul,
 					   d->cyc2ns_shift);
-	pv_time_ops.sched_clock = vmware_sched_clock;
+	pv_ops.time.sched_clock = vmware_sched_clock;
 	pr_info("using sched offset of %llu ns\n", d->cyc2ns_offset);
 }
 static void __init vmware_paravirt_ops_setup(void)
 {
 	pv_info.name = "VMware hypervisor";
-	pv_cpu_ops.io_delay = paravirt_nop;
+	pv_ops.cpu.io_delay = paravirt_nop;
 	if (vmware_tsc_khz && vmw_sched_clock)
 		vmware_sched_clock_setup();

--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -26,7 +26,7 @@
 #include <asm/nospec-branch.h>
 #include <asm/fixmap.h>
-#ifdef CONFIG_PARAVIRT
+#ifdef CONFIG_PARAVIRT_XXL
 #include <asm/asm-offsets.h>
 #include <asm/paravirt.h>
 #define GET_CR2_INTO(reg) GET_CR2_INTO_RAX ; movq %rax, reg

--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -283,7 +283,7 @@ static void __init paravirt_ops_setup(void)
 	pv_info.name = "KVM";
 	if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY))
-		pv_cpu_ops.io_delay = kvm_io_delay;
+		pv_ops.cpu.io_delay = kvm_io_delay;
 #ifdef CONFIG_X86_IO_APIC
 	no_timer_check = 1;
@@ -632,14 +632,14 @@ static void __init kvm_guest_init(void)
 	if (kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) {
 		has_steal_clock = 1;
-		pv_time_ops.steal_clock = kvm_steal_clock;
+		pv_ops.time.steal_clock = kvm_steal_clock;
 	}
 	if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) &&
 	    !kvm_para_has_hint(KVM_HINTS_REALTIME) &&
 	    kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) {
-		pv_mmu_ops.flush_tlb_others = kvm_flush_tlb_others;
+		pv_ops.mmu.flush_tlb_others = kvm_flush_tlb_others;
-		pv_mmu_ops.tlb_remove_table = tlb_remove_table;
+		pv_ops.mmu.tlb_remove_table = tlb_remove_table;
 	}
 	if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
@@ -850,13 +850,14 @@ void __init kvm_spinlock_init(void)
 		return;
 	__pv_init_lock_hash();
-	pv_lock_ops.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath;
+	pv_ops.lock.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath;
-	pv_lock_ops.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock);
+	pv_ops.lock.queued_spin_unlock =
-	pv_lock_ops.wait = kvm_wait;
+		PV_CALLEE_SAVE(__pv_queued_spin_unlock);
-	pv_lock_ops.kick = kvm_kick_cpu;
+	pv_ops.lock.wait = kvm_wait;
+	pv_ops.lock.kick = kvm_kick_cpu;
 	if (kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) {
-		pv_lock_ops.vcpu_is_preempted =
+		pv_ops.lock.vcpu_is_preempted =
 			PV_CALLEE_SAVE(__kvm_vcpu_is_preempted);
 	}
 }

--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -118,13 +118,13 @@ static u64 kvm_sched_clock_read(void)
 static inline void kvm_sched_clock_init(bool stable)
 {
 	if (!stable) {
-		pv_time_ops.sched_clock = kvm_clock_read;
+		pv_ops.time.sched_clock = kvm_clock_read;
 		clear_sched_clock_stable();
 		return;
 	}
 	kvm_sched_clock_offset = kvm_clock_read();
-	pv_time_ops.sched_clock = kvm_sched_clock_read;
+	pv_ops.time.sched_clock = kvm_sched_clock_read;
 	pr_info("kvm-clock: using sched offset of %llu cycles",
 		kvm_sched_clock_offset);

--- a/arch/x86/kernel/paravirt-spinlocks.c
+++ b/arch/x86/kernel/paravirt-spinlocks.c
@@ -17,7 +17,7 @@ PV_CALLEE_SAVE_REGS_THUNK(__native_queued_spin_unlock);
 bool pv_is_native_spin_unlock(void)
 {
-	return pv_lock_ops.queued_spin_unlock.func ==
+	return pv_ops.lock.queued_spin_unlock.func ==
 		__raw_callee_save___native_queued_spin_unlock;
 }
@@ -29,17 +29,6 @@ PV_CALLEE_SAVE_REGS_THUNK(__native_vcpu_is_preempted);
 bool pv_is_native_vcpu_is_preempted(void)
 {
-	return pv_lock_ops.vcpu_is_preempted.func ==
+	return pv_ops.lock.vcpu_is_preempted.func ==
 		__raw_callee_save___native_vcpu_is_preempted;
 }
-struct pv_lock_ops pv_lock_ops = {
-#ifdef CONFIG_SMP
-	.queued_spin_lock_slowpath = native_queued_spin_lock_slowpath,
-	.queued_spin_unlock = PV_CALLEE_SAVE(__native_queued_spin_unlock),
-	.wait = paravirt_nop,
-	.kick = paravirt_nop,
-	.vcpu_is_preempted = PV_CALLEE_SAVE(__native_vcpu_is_preempted),
-#endif /* SMP */
-};
-EXPORT_SYMBOL(pv_lock_ops);
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -81,10 +81,8 @@ struct branch {
 	u32 delta;
 } __attribute__((packed));
-unsigned paravirt_patch_call(void *insnbuf,
+static unsigned paravirt_patch_call(void *insnbuf, const void *target,
-			     const void *target, u16 tgt_clobbers,
+				    unsigned long addr, unsigned len)
-			     unsigned long addr, u16 site_clobbers,
-			     unsigned len)
 {
 	struct branch *b = insnbuf;
 	unsigned long delta = (unsigned long)target - (addr+5);
@@ -103,7 +101,8 @@ unsigned paravirt_patch_call(void *insnbuf,
 	return 5;
 }
-unsigned paravirt_patch_jmp(void *insnbuf, const void *target,
+#ifdef CONFIG_PARAVIRT_XXL
+static unsigned paravirt_patch_jmp(void *insnbuf, const void *target,
 				   unsigned long addr, unsigned len)
 {
 	struct branch *b = insnbuf;
@@ -121,6 +120,7 @@ unsigned paravirt_patch_jmp(void *insnbuf, const void *target,
 	return 5;
 }
+#endif
 DEFINE_STATIC_KEY_TRUE(virt_spin_lock_key);
@@ -130,29 +130,14 @@ void __init native_pv_lock_init(void)
 		static_branch_disable(&virt_spin_lock_key);
 }
-/*
+unsigned paravirt_patch_default(u8 type, void *insnbuf,
+				unsigned long addr, unsigned len)
+{
+	/*
 	 * Neat trick to map patch type back to the call within the
 	 * corresponding structure.
 	 */
-static void *get_call_destination(u8 type)
+	void *opfunc = *((void **)&pv_ops + type);
-{
-	struct paravirt_patch_template tmpl = {
-		.pv_init_ops = pv_init_ops,
-		.pv_time_ops = pv_time_ops,
-		.pv_cpu_ops = pv_cpu_ops,
-		.pv_irq_ops = pv_irq_ops,
-		.pv_mmu_ops = pv_mmu_ops,
-#ifdef CONFIG_PARAVIRT_SPINLOCKS
-		.pv_lock_ops = pv_lock_ops,
-#endif
-	};
-	return *((void **)&tmpl + type);
-}
-unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf,
-				unsigned long addr, unsigned len)
-{
-	void *opfunc = get_call_destination(type);
 	unsigned ret;
 	if (opfunc == NULL)
@@ -167,15 +152,15 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf,
 	else if (opfunc == _paravirt_ident_64)
 		ret = paravirt_patch_ident_64(insnbuf, len);
-	else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) ||
+#ifdef CONFIG_PARAVIRT_XXL
-		 type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret64))
+	else if (type == PARAVIRT_PATCH(cpu.iret) ||
+		 type == PARAVIRT_PATCH(cpu.usergs_sysret64))
 		/* If operation requires a jmp, then jmp */
 		ret = paravirt_patch_jmp(insnbuf, opfunc, addr, len);
+#endif
 	else
-		/* Otherwise call the function; assume target could
+		/* Otherwise call the function. */
-		   clobber any caller-save reg */
+		ret = paravirt_patch_call(insnbuf, opfunc, addr, len);
-		ret = paravirt_patch_call(insnbuf, opfunc, CLBR_ANY,
-					  addr, clobbers, len);
 	return ret;
 }
@@ -281,6 +266,7 @@ void paravirt_flush_lazy_mmu(void)
 	preempt_enable();
 }
+#ifdef CONFIG_PARAVIRT_XXL
 void paravirt_start_context_switch(struct task_struct *prev)
 {
 	BUG_ON(preemptible());
@@ -301,6 +287,7 @@ void paravirt_end_context_switch(struct task_struct *next)
 	if (test_and_clear_ti_thread_flag(task_thread_info(next), TIF_LAZY_MMU_UPDATES))
 		arch_enter_lazy_mmu_mode();
 }
+#endif
 enum paravirt_lazy_mode paravirt_get_lazy_mode(void)
 {
@@ -312,85 +299,16 @@ enum paravirt_lazy_mode paravirt_get_lazy_mode(void)
 struct pv_info pv_info = {
 	.name = "bare hardware",
+#ifdef CONFIG_PARAVIRT_XXL
 	.kernel_rpl = 0,
 	.shared_kernel_pmd = 1,	/* Only used when CONFIG_X86_PAE is set */
 #ifdef CONFIG_X86_64
 	.extra_user_64bit_cs = __USER_CS,
 #endif
-};
-struct pv_init_ops pv_init_ops = {
-	.patch = native_patch,
-};
-struct pv_time_ops pv_time_ops = {
-	.sched_clock = native_sched_clock,
-	.steal_clock = native_steal_clock,
-};
-__visible struct pv_irq_ops pv_irq_ops = {
-	.save_fl = __PV_IS_CALLEE_SAVE(native_save_fl),
-	.restore_fl = __PV_IS_CALLEE_SAVE(native_restore_fl),
-	.irq_disable = __PV_IS_CALLEE_SAVE(native_irq_disable),
-	.irq_enable = __PV_IS_CALLEE_SAVE(native_irq_enable),
-	.safe_halt = native_safe_halt,
-	.halt = native_halt,
-};
-__visible struct pv_cpu_ops pv_cpu_ops = {
-	.cpuid = native_cpuid,
-	.get_debugreg = native_get_debugreg,
-	.set_debugreg = native_set_debugreg,
-	.read_cr0 = native_read_cr0,
-	.write_cr0 = native_write_cr0,
-	.write_cr4 = native_write_cr4,
-#ifdef CONFIG_X86_64
-	.read_cr8 = native_read_cr8,
-	.write_cr8 = native_write_cr8,
 #endif
-	.wbinvd = native_wbinvd,
-	.read_msr = native_read_msr,
-	.write_msr = native_write_msr,
-	.read_msr_safe = native_read_msr_safe,
-	.write_msr_safe = native_write_msr_safe,
-	.read_pmc = native_read_pmc,
-	.load_tr_desc = native_load_tr_desc,
-	.set_ldt = native_set_ldt,
-	.load_gdt = native_load_gdt,
-	.load_idt = native_load_idt,
-	.store_tr = native_store_tr,
-	.load_tls = native_load_tls,
-#ifdef CONFIG_X86_64
-	.load_gs_index = native_load_gs_index,
-#endif
-	.write_ldt_entry = native_write_ldt_entry,
-	.write_gdt_entry = native_write_gdt_entry,
-	.write_idt_entry = native_write_idt_entry,
-	.alloc_ldt = paravirt_nop,
-	.free_ldt = paravirt_nop,
-	.load_sp0 = native_load_sp0,
-#ifdef CONFIG_X86_64
-	.usergs_sysret64 = native_usergs_sysret64,
-#endif
-	.iret = native_iret,
-	.swapgs = native_swapgs,
-	.set_iopl_mask = native_set_iopl_mask,
-	.io_delay = native_io_delay,
-	.start_context_switch = paravirt_nop,
-	.end_context_switch = paravirt_nop,
 };
-/* At this point, native_get/set_debugreg has real function entries */
-NOKPROBE_SYMBOL(native_get_debugreg);
-NOKPROBE_SYMBOL(native_set_debugreg);
-NOKPROBE_SYMBOL(native_load_idt);
 #if defined(CONFIG_X86_32) && !defined(CONFIG_X86_PAE)
 /* 32-bit pagetable entries */
 #define PTE_IDENT	__PV_IS_CALLEE_SAVE(_paravirt_ident_32)
@@ -399,85 +317,171 @@ NOKPROBE_SYMBOL(native_load_idt);
 #define PTE_IDENT	__PV_IS_CALLEE_SAVE(_paravirt_ident_64)
 #endif
-struct pv_mmu_ops pv_mmu_ops __ro_after_init = {
+struct paravirt_patch_template pv_ops = {
+	/* Init ops. */
+	.init.patch		= native_patch,
-	.read_cr2 = native_read_cr2,
+	/* Time ops. */
-	.write_cr2 = native_write_cr2,
+	.time.sched_clock	= native_sched_clock,
-	.read_cr3 = __native_read_cr3,
+	.time.steal_clock	= native_steal_clock,
-	.write_cr3 = native_write_cr3,
-	.flush_tlb_user = native_flush_tlb,
+	/* Cpu ops. */
-	.flush_tlb_kernel = native_flush_tlb_global,
+	.cpu.io_delay		= native_io_delay,
-	.flush_tlb_one_user = native_flush_tlb_one_user,
-	.flush_tlb_others = native_flush_tlb_others,
-	.tlb_remove_table = (void (*)(struct mmu_gather *, void *))tlb_remove_page,
-	.pgd_alloc = __paravirt_pgd_alloc,
+#ifdef CONFIG_PARAVIRT_XXL
-	.pgd_free = paravirt_nop,
+	.cpu.cpuid		= native_cpuid,
+	.cpu.get_debugreg	= native_get_debugreg,
+	.cpu.set_debugreg	= native_set_debugreg,
+	.cpu.read_cr0		= native_read_cr0,
+	.cpu.write_cr0		= native_write_cr0,
+	.cpu.write_cr4		= native_write_cr4,
+#ifdef CONFIG_X86_64
+	.cpu.read_cr8		= native_read_cr8,
+	.cpu.write_cr8		= native_write_cr8,
+#endif
+	.cpu.wbinvd		= native_wbinvd,
+	.cpu.read_msr		= native_read_msr,
+	.cpu.write_msr		= native_write_msr,
+	.cpu.read_msr_safe	= native_read_msr_safe,
+	.cpu.write_msr_safe	= native_write_msr_safe,
+	.cpu.read_pmc		= native_read_pmc,
+	.cpu.load_tr_desc	= native_load_tr_desc,
+	.cpu.set_ldt		= native_set_ldt,
+	.cpu.load_gdt		= native_load_gdt,
+	.cpu.load_idt		= native_load_idt,
+	.cpu.store_tr		= native_store_tr,
+	.cpu.load_tls		= native_load_tls,
+#ifdef CONFIG_X86_64
+	.cpu.load_gs_index	= native_load_gs_index,
+#endif
+	.cpu.write_ldt_entry	= native_write_ldt_entry,
+	.cpu.write_gdt_entry	= native_write_gdt_entry,
+	.cpu.write_idt_entry	= native_write_idt_entry,
-	.alloc_pte = paravirt_nop,
+	.cpu.alloc_ldt		= paravirt_nop,
-	.alloc_pmd = paravirt_nop,
+	.cpu.free_ldt		= paravirt_nop,
-	.alloc_pud = paravirt_nop,
-	.alloc_p4d = paravirt_nop,
-	.release_pte = paravirt_nop,
-	.release_pmd = paravirt_nop,
-	.release_pud = paravirt_nop,
-	.release_p4d = paravirt_nop,
-	.set_pte = native_set_pte,
+	.cpu.load_sp0		= native_load_sp0,
-	.set_pte_at = native_set_pte_at,
-	.set_pmd = native_set_pmd,
-	.ptep_modify_prot_start = __ptep_modify_prot_start,
+#ifdef CONFIG_X86_64
-	.ptep_modify_prot_commit = __ptep_modify_prot_commit,
+	.cpu.usergs_sysret64	= native_usergs_sysret64,
+#endif
+	.cpu.iret		= native_iret,
+	.cpu.swapgs		= native_swapgs,
+	.cpu.set_iopl_mask	= native_set_iopl_mask,
+	.cpu.start_context_switch	= paravirt_nop,
+	.cpu.end_context_switch		= paravirt_nop,
+	/* Irq ops. */
+	.irq.save_fl		= __PV_IS_CALLEE_SAVE(native_save_fl),
+	.irq.restore_fl		= __PV_IS_CALLEE_SAVE(native_restore_fl),
+	.irq.irq_disable	= __PV_IS_CALLEE_SAVE(native_irq_disable),
+	.irq.irq_enable		= __PV_IS_CALLEE_SAVE(native_irq_enable),
+	.irq.safe_halt		= native_safe_halt,
+	.irq.halt		= native_halt,
+#endif /* CONFIG_PARAVIRT_XXL */
+	/* Mmu ops. */
+	.mmu.flush_tlb_user	= native_flush_tlb,
+	.mmu.flush_tlb_kernel	= native_flush_tlb_global,
+	.mmu.flush_tlb_one_user	= native_flush_tlb_one_user,
+	.mmu.flush_tlb_others	= native_flush_tlb_others,
+	.mmu.tlb_remove_table	=
+			(void (*)(struct mmu_gather *, void *))tlb_remove_page,
+	.mmu.exit_mmap		= paravirt_nop,
+#ifdef CONFIG_PARAVIRT_XXL
+	.mmu.read_cr2		= native_read_cr2,
+	.mmu.write_cr2		= native_write_cr2,
+	.mmu.read_cr3		= __native_read_cr3,
+	.mmu.write_cr3		= native_write_cr3,
+	.mmu.pgd_alloc		= __paravirt_pgd_alloc,
+	.mmu.pgd_free		= paravirt_nop,
+	.mmu.alloc_pte		= paravirt_nop,
+	.mmu.alloc_pmd		= paravirt_nop,
+	.mmu.alloc_pud		= paravirt_nop,
+	.mmu.alloc_p4d		= paravirt_nop,
+	.mmu.release_pte	= paravirt_nop,
+	.mmu.release_pmd	= paravirt_nop,
+	.mmu.release_pud	= paravirt_nop,
+	.mmu.release_p4d	= paravirt_nop,
+	.mmu.set_pte		= native_set_pte,
+	.mmu.set_pte_at		= native_set_pte_at,
+	.mmu.set_pmd		= native_set_pmd,
+	.mmu.ptep_modify_prot_start	= __ptep_modify_prot_start,
+	.mmu.ptep_modify_prot_commit	= __ptep_modify_prot_commit,
 #if CONFIG_PGTABLE_LEVELS >= 3
 #ifdef CONFIG_X86_PAE
-	.set_pte_atomic = native_set_pte_atomic,
+	.mmu.set_pte_atomic	= native_set_pte_atomic,
-	.pte_clear = native_pte_clear,
+	.mmu.pte_clear		= native_pte_clear,
-	.pmd_clear = native_pmd_clear,
+	.mmu.pmd_clear		= native_pmd_clear,
 #endif
-	.set_pud = native_set_pud,
+	.mmu.set_pud		= native_set_pud,
-	.pmd_val = PTE_IDENT,
+	.mmu.pmd_val		= PTE_IDENT,
-	.make_pmd = PTE_IDENT,
+	.mmu.make_pmd		= PTE_IDENT,
 #if CONFIG_PGTABLE_LEVELS >= 4
-	.pud_val = PTE_IDENT,
+	.mmu.pud_val		= PTE_IDENT,
-	.make_pud = PTE_IDENT,
+	.mmu.make_pud		= PTE_IDENT,
-	.set_p4d = native_set_p4d,
+	.mmu.set_p4d		= native_set_p4d,
 #if CONFIG_PGTABLE_LEVELS >= 5
-	.p4d_val = PTE_IDENT,
+	.mmu.p4d_val		= PTE_IDENT,
-	.make_p4d = PTE_IDENT,
+	.mmu.make_p4d		= PTE_IDENT,
-	.set_pgd = native_set_pgd,
+	.mmu.set_pgd		= native_set_pgd,
 #endif /* CONFIG_PGTABLE_LEVELS >= 5 */
 #endif /* CONFIG_PGTABLE_LEVELS >= 4 */
 #endif /* CONFIG_PGTABLE_LEVELS >= 3 */
-	.pte_val = PTE_IDENT,
+	.mmu.pte_val		= PTE_IDENT,
-	.pgd_val = PTE_IDENT,
+	.mmu.pgd_val		= PTE_IDENT,
-	.make_pte = PTE_IDENT,
+	.mmu.make_pte		= PTE_IDENT,
-	.make_pgd = PTE_IDENT,
+	.mmu.make_pgd		= PTE_IDENT,
-	.dup_mmap = paravirt_nop,
+	.mmu.dup_mmap		= paravirt_nop,
-	.exit_mmap = paravirt_nop,
+	.mmu.activate_mm	= paravirt_nop,
-	.activate_mm = paravirt_nop,
-	.lazy_mode = {
+	.mmu.lazy_mode = {
 		.enter		= paravirt_nop,
 		.leave		= paravirt_nop,
 		.flush		= paravirt_nop,
 	},
-	.set_fixmap = native_set_fixmap,
+	.mmu.set_fixmap		= native_set_fixmap,
+#endif /* CONFIG_PARAVIRT_XXL */
+#if defined(CONFIG_PARAVIRT_SPINLOCKS)
+	/* Lock ops. */
+#ifdef CONFIG_SMP
+	.lock.queued_spin_lock_slowpath	= native_queued_spin_lock_slowpath,
+	.lock.queued_spin_unlock	=
+				PV_CALLEE_SAVE(__native_queued_spin_unlock),
+	.lock.wait			= paravirt_nop,
+	.lock.kick			= paravirt_nop,
+	.lock.vcpu_is_preempted		=
+				PV_CALLEE_SAVE(__native_vcpu_is_preempted),
+#endif /* SMP */
+#endif
 };
-EXPORT_SYMBOL_GPL(pv_time_ops);
+#ifdef CONFIG_PARAVIRT_XXL
-EXPORT_SYMBOL    (pv_cpu_ops);
+/* At this point, native_get/set_debugreg has real function entries */
-EXPORT_SYMBOL    (pv_mmu_ops);
+NOKPROBE_SYMBOL(native_get_debugreg);
+NOKPROBE_SYMBOL(native_set_debugreg);
+NOKPROBE_SYMBOL(native_load_idt);
+#endif
+EXPORT_SYMBOL_GPL(pv_ops);
 EXPORT_SYMBOL_GPL(pv_info);
-EXPORT_SYMBOL    (pv_irq_ops);
--- a/arch/x86/kernel/paravirt_patch_32.c
+++ b/arch/x86/kernel/paravirt_patch_32.c
 // SPDX-License-Identifier: GPL-2.0
 #include <asm/paravirt.h>
-DEF_NATIVE(pv_irq_ops, irq_disable, "cli");
+#ifdef CONFIG_PARAVIRT_XXL
-DEF_NATIVE(pv_irq_ops, irq_enable, "sti");
+DEF_NATIVE(irq, irq_disable, "cli");
-DEF_NATIVE(pv_irq_ops, restore_fl, "push %eax; popf");
+DEF_NATIVE(irq, irq_enable, "sti");
-DEF_NATIVE(pv_irq_ops, save_fl, "pushf; pop %eax");
+DEF_NATIVE(irq, restore_fl, "push %eax; popf");
-DEF_NATIVE(pv_cpu_ops, iret, "iret");
+DEF_NATIVE(irq, save_fl, "pushf; pop %eax");
-DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax");
+DEF_NATIVE(cpu, iret, "iret");
-DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3");
+DEF_NATIVE(mmu, read_cr2, "mov %cr2, %eax");
-DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax");
+DEF_NATIVE(mmu, write_cr3, "mov %eax, %cr3");
+DEF_NATIVE(mmu, read_cr3, "mov %cr3, %eax");
+#endif
 #if defined(CONFIG_PARAVIRT_SPINLOCKS)
-DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%eax)");
+DEF_NATIVE(lock, queued_spin_unlock, "movb $0, (%eax)");
-DEF_NATIVE(pv_lock_ops, vcpu_is_preempted, "xor %eax, %eax");
+DEF_NATIVE(lock, vcpu_is_preempted, "xor %eax, %eax");
 #endif
 unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len)
@@ -30,53 +32,42 @@ unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len)
 extern bool pv_is_native_spin_unlock(void);
 extern bool pv_is_native_vcpu_is_preempted(void);
-unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
+unsigned native_patch(u8 type, void *ibuf, unsigned long addr, unsigned len)
-		      unsigned long addr, unsigned len)
 {
-	const unsigned char *start, *end;
-	unsigned ret;
 #define PATCH_SITE(ops, x)					\
 	case PARAVIRT_PATCH(ops.x):				\
-			start = start_##ops##_##x;		\
+		return paravirt_patch_insns(ibuf, len, start_##ops##_##x, end_##ops##_##x)
-			end = end_##ops##_##x;			\
-			goto patch_site
 	switch (type) {
-		PATCH_SITE(pv_irq_ops, irq_disable);
+#ifdef CONFIG_PARAVIRT_XXL
-		PATCH_SITE(pv_irq_ops, irq_enable);
+		PATCH_SITE(irq, irq_disable);
-		PATCH_SITE(pv_irq_ops, restore_fl);
+		PATCH_SITE(irq, irq_enable);
-		PATCH_SITE(pv_irq_ops, save_fl);
+		PATCH_SITE(irq, restore_fl);
-		PATCH_SITE(pv_cpu_ops, iret);
+		PATCH_SITE(irq, save_fl);
-		PATCH_SITE(pv_mmu_ops, read_cr2);
+		PATCH_SITE(cpu, iret);
-		PATCH_SITE(pv_mmu_ops, read_cr3);
+		PATCH_SITE(mmu, read_cr2);
-		PATCH_SITE(pv_mmu_ops, write_cr3);
+		PATCH_SITE(mmu, read_cr3);
+		PATCH_SITE(mmu, write_cr3);
+#endif
 #if defined(CONFIG_PARAVIRT_SPINLOCKS)
-		case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock):
+	case PARAVIRT_PATCH(lock.queued_spin_unlock):
-			if (pv_is_native_spin_unlock()) {
+		if (pv_is_native_spin_unlock())
-				start = start_pv_lock_ops_queued_spin_unlock;
+			return paravirt_patch_insns(ibuf, len,
-				end   = end_pv_lock_ops_queued_spin_unlock;
+						    start_lock_queued_spin_unlock,
-				goto patch_site;
+						    end_lock_queued_spin_unlock);
-			}
+		break;
-			goto patch_default;
-		case PARAVIRT_PATCH(pv_lock_ops.vcpu_is_preempted):
+	case PARAVIRT_PATCH(lock.vcpu_is_preempted):
-			if (pv_is_native_vcpu_is_preempted()) {
+		if (pv_is_native_vcpu_is_preempted())
-				start = start_pv_lock_ops_vcpu_is_preempted;
+			return paravirt_patch_insns(ibuf, len,
-				end   = end_pv_lock_ops_vcpu_is_preempted;
+						    start_lock_vcpu_is_preempted,
-				goto patch_site;
+						    end_lock_vcpu_is_preempted);
-			}
+		break;
-			goto patch_default;
 #endif
 	default:
-patch_default: __maybe_unused
-		ret = paravirt_patch_default(type, clobbers, ibuf, addr, len);
-		break;
-patch_site:
-		ret = paravirt_patch_insns(ibuf, len, start, end);
 		break;
 	}
 #undef PATCH_SITE
-	return ret;
+	return paravirt_patch_default(type, ibuf, addr, len);
 }
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -3,24 +3,26 @@
 #include <asm/asm-offsets.h>
 #include <linux/stringify.h>
-DEF_NATIVE(pv_irq_ops, irq_disable, "cli");
+#ifdef CONFIG_PARAVIRT_XXL
-DEF_NATIVE(pv_irq_ops, irq_enable, "sti");
+DEF_NATIVE(irq, irq_disable, "cli");
-DEF_NATIVE(pv_irq_ops, restore_fl, "pushq %rdi; popfq");
+DEF_NATIVE(irq, irq_enable, "sti");
-DEF_NATIVE(pv_irq_ops, save_fl, "pushfq; popq %rax");
+DEF_NATIVE(irq, restore_fl, "pushq %rdi; popfq");
-DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax");
+DEF_NATIVE(irq, save_fl, "pushfq; popq %rax");
-DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax");
+DEF_NATIVE(mmu, read_cr2, "movq %cr2, %rax");
-DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3");
+DEF_NATIVE(mmu, read_cr3, "movq %cr3, %rax");
-DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd");
+DEF_NATIVE(mmu, write_cr3, "movq %rdi, %cr3");
+DEF_NATIVE(cpu, wbinvd, "wbinvd");
-DEF_NATIVE(pv_cpu_ops, usergs_sysret64, "swapgs; sysretq");
+DEF_NATIVE(cpu, usergs_sysret64, "swapgs; sysretq");
-DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs");
+DEF_NATIVE(cpu, swapgs, "swapgs");
+#endif
 DEF_NATIVE(, mov32, "mov %edi, %eax");
 DEF_NATIVE(, mov64, "mov %rdi, %rax");
 #if defined(CONFIG_PARAVIRT_SPINLOCKS)
-DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%rdi)");
+DEF_NATIVE(lock, queued_spin_unlock, "movb $0, (%rdi)");
-DEF_NATIVE(pv_lock_ops, vcpu_is_preempted, "xor %eax, %eax");
+DEF_NATIVE(lock, vcpu_is_preempted, "xor %eax, %eax");
 #endif
 unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len)
@@ -38,55 +40,44 @@ unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len)
 extern bool pv_is_native_spin_unlock(void);
 extern bool pv_is_native_vcpu_is_preempted(void);
-unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
+unsigned native_patch(u8 type, void *ibuf, unsigned long addr, unsigned len)
-		      unsigned long addr, unsigned len)
 {
-	const unsigned char *start, *end;
-	unsigned ret;
 #define PATCH_SITE(ops, x)					\
 	case PARAVIRT_PATCH(ops.x):				\
-			start = start_##ops##_##x;		\
+		return paravirt_patch_insns(ibuf, len, start_##ops##_##x, end_##ops##_##x)
-			end = end_##ops##_##x;			\
-			goto patch_site
-	switch(type) {
-		PATCH_SITE(pv_irq_ops, restore_fl);
-		PATCH_SITE(pv_irq_ops, save_fl);
-		PATCH_SITE(pv_irq_ops, irq_enable);
-		PATCH_SITE(pv_irq_ops, irq_disable);
-		PATCH_SITE(pv_cpu_ops, usergs_sysret64);
-		PATCH_SITE(pv_cpu_ops, swapgs);
-		PATCH_SITE(pv_mmu_ops, read_cr2);
-		PATCH_SITE(pv_mmu_ops, read_cr3);
-		PATCH_SITE(pv_mmu_ops, write_cr3);
-		PATCH_SITE(pv_cpu_ops, wbinvd);
-#if defined(CONFIG_PARAVIRT_SPINLOCKS)
-		case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock):
-			if (pv_is_native_spin_unlock()) {
-				start = start_pv_lock_ops_queued_spin_unlock;
-				end   = end_pv_lock_ops_queued_spin_unlock;
-				goto patch_site;
-			}
-			goto patch_default;
-		case PARAVIRT_PATCH(pv_lock_ops.vcpu_is_preempted):
+	switch (type) {
-			if (pv_is_native_vcpu_is_preempted()) {
+#ifdef CONFIG_PARAVIRT_XXL
-				start = start_pv_lock_ops_vcpu_is_preempted;
+		PATCH_SITE(irq, restore_fl);
-				end   = end_pv_lock_ops_vcpu_is_preempted;
+		PATCH_SITE(irq, save_fl);
-				goto patch_site;
+		PATCH_SITE(irq, irq_enable);
-			}
+		PATCH_SITE(irq, irq_disable);
-			goto patch_default;
+		PATCH_SITE(cpu, usergs_sysret64);
+		PATCH_SITE(cpu, swapgs);
+		PATCH_SITE(cpu, wbinvd);
+		PATCH_SITE(mmu, read_cr2);
+		PATCH_SITE(mmu, read_cr3);
+		PATCH_SITE(mmu, write_cr3);
 #endif
+#if defined(CONFIG_PARAVIRT_SPINLOCKS)
+	case PARAVIRT_PATCH(lock.queued_spin_unlock):
+		if (pv_is_native_spin_unlock())
+			return paravirt_patch_insns(ibuf, len,
+						    start_lock_queued_spin_unlock,
+						    end_lock_queued_spin_unlock);
+		break;
-	default:
+	case PARAVIRT_PATCH(lock.vcpu_is_preempted):
-patch_default: __maybe_unused
+		if (pv_is_native_vcpu_is_preempted())
-		ret = paravirt_patch_default(type, clobbers, ibuf, addr, len);
+			return paravirt_patch_insns(ibuf, len,
+						    start_lock_vcpu_is_preempted,
+						    end_lock_vcpu_is_preempted);
 		break;
+#endif
-patch_site:
+	default:
-		ret = paravirt_patch_insns(ibuf, len, start, end);
 		break;
 	}
 #undef PATCH_SITE
-	return ret;
+	return paravirt_patch_default(type, ibuf, addr, len);
 }
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -247,7 +247,7 @@ unsigned long long sched_clock(void)
 bool using_native_sched_clock(void)
 {
-	return pv_time_ops.sched_clock == native_sched_clock;
+	return pv_ops.time.sched_clock == native_sched_clock;
 }
 #else
 unsigned long long

--- a/arch/x86/kernel/vsmp_64.c
+++ b/arch/x86/kernel/vsmp_64.c
@@ -26,7 +26,7 @@
 #define TOPOLOGY_REGISTER_OFFSET 0x10
-#if defined CONFIG_PCI && defined CONFIG_PARAVIRT
+#if defined CONFIG_PCI && defined CONFIG_PARAVIRT_XXL
 /*
 * Interrupt control on vSMPowered systems:
 * ~AC is a shadow of IF.  If IF is 'on' AC should be 'off'
@@ -69,17 +69,17 @@ asmlinkage __visible void vsmp_irq_enable(void)
 }
 PV_CALLEE_SAVE_REGS_THUNK(vsmp_irq_enable);
-static unsigned __init vsmp_patch(u8 type, u16 clobbers, void *ibuf,
+static unsigned __init vsmp_patch(u8 type, void *ibuf,
 				  unsigned long addr, unsigned len)
 {
 	switch (type) {
-	case PARAVIRT_PATCH(pv_irq_ops.irq_enable):
+	case PARAVIRT_PATCH(irq.irq_enable):
-	case PARAVIRT_PATCH(pv_irq_ops.irq_disable):
+	case PARAVIRT_PATCH(irq.irq_disable):
-	case PARAVIRT_PATCH(pv_irq_ops.save_fl):
+	case PARAVIRT_PATCH(irq.save_fl):
-	case PARAVIRT_PATCH(pv_irq_ops.restore_fl):
+	case PARAVIRT_PATCH(irq.restore_fl):
-		return paravirt_patch_default(type, clobbers, ibuf, addr, len);
+		return paravirt_patch_default(type, ibuf, addr, len);
 	default:
-		return native_patch(type, clobbers, ibuf, addr, len);
+		return native_patch(type, ibuf, addr, len);
 	}
 }
@@ -111,11 +111,11 @@ static void __init set_vsmp_pv_ops(void)
 	if (cap & ctl & (1 << 4)) {
 		/* Setup irq ops and turn on vSMP  IRQ fastpath handling */
-		pv_irq_ops.irq_disable = PV_CALLEE_SAVE(vsmp_irq_disable);
+		pv_ops.irq.irq_disable = PV_CALLEE_SAVE(vsmp_irq_disable);
-		pv_irq_ops.irq_enable  = PV_CALLEE_SAVE(vsmp_irq_enable);
+		pv_ops.irq.irq_enable = PV_CALLEE_SAVE(vsmp_irq_enable);
-		pv_irq_ops.save_fl  = PV_CALLEE_SAVE(vsmp_save_fl);
+		pv_ops.irq.save_fl = PV_CALLEE_SAVE(vsmp_save_fl);
-		pv_irq_ops.restore_fl  = PV_CALLEE_SAVE(vsmp_restore_fl);
+		pv_ops.irq.restore_fl = PV_CALLEE_SAVE(vsmp_restore_fl);
-		pv_init_ops.patch = vsmp_patch;
+		pv_ops.init.patch = vsmp_patch;
 		ctl &= ~(1 << 4);
 	}
 	writel(ctl, address + 4);

--- a/arch/x86/mm/mem_encrypt_identity.c
+++ b/arch/x86/mm/mem_encrypt_identity.c
@@ -27,6 +27,7 @@
 * be extended when new paravirt and debugging variants are added.)
 */
 #undef CONFIG_PARAVIRT
+#undef CONFIG_PARAVIRT_XXL
 #undef CONFIG_PARAVIRT_SPINLOCKS
 #include <linux/kernel.h>

--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -18,6 +18,7 @@ config XEN_PV
 	bool "Xen PV guest support"
 	default y
 	depends on XEN
+	select PARAVIRT_XXL
 	select XEN_HAVE_PVMMU
 	select XEN_HAVE_VPMU
 	help

--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -14,23 +14,44 @@ nostackp := $(call cc-option, -fno-stack-protector)
 CFLAGS_enlighten_pv.o		:= $(nostackp)
 CFLAGS_mmu_pv.o			:= $(nostackp)
-obj-y		:= enlighten.o multicalls.o mmu.o irq.o \
+obj-y				+= enlighten.o
-			time.o xen-asm.o xen-asm_$(BITS).o \
+obj-y				+= mmu.o
-			grant-table.o suspend.o platform-pci-unplug.o
+obj-y				+= time.o
+obj-y				+= grant-table.o
+obj-y				+= suspend.o
+obj-$(CONFIG_XEN_PVHVM)		+= enlighten_hvm.o
+obj-$(CONFIG_XEN_PVHVM)		+= mmu_hvm.o
+obj-$(CONFIG_XEN_PVHVM)		+= suspend_hvm.o
+obj-$(CONFIG_XEN_PVHVM)		+= platform-pci-unplug.o
+obj-$(CONFIG_XEN_PV)		+= setup.o
+obj-$(CONFIG_XEN_PV)		+= apic.o
+obj-$(CONFIG_XEN_PV)		+= pmu.o
+obj-$(CONFIG_XEN_PV)		+= suspend_pv.o
+obj-$(CONFIG_XEN_PV)		+= p2m.o
+obj-$(CONFIG_XEN_PV)		+= enlighten_pv.o
+obj-$(CONFIG_XEN_PV)		+= mmu_pv.o
+obj-$(CONFIG_XEN_PV)		+= irq.o
+obj-$(CONFIG_XEN_PV)		+= multicalls.o
+obj-$(CONFIG_XEN_PV)		+= xen-asm.o
+obj-$(CONFIG_XEN_PV)		+= xen-asm_$(BITS).o
-obj-$(CONFIG_XEN_PVHVM)		+= enlighten_hvm.o mmu_hvm.o suspend_hvm.o
-obj-$(CONFIG_XEN_PV)			+= setup.o apic.o pmu.o suspend_pv.o \
-						p2m.o enlighten_pv.o mmu_pv.o
 obj-$(CONFIG_XEN_PVH)		+= enlighten_pvh.o
+obj-$(CONFIG_XEN_PVH)	 	+= xen-pvh.o
 obj-$(CONFIG_EVENT_TRACING)	+= trace.o
 obj-$(CONFIG_SMP)		+= smp.o
 obj-$(CONFIG_XEN_PV_SMP)  	+= smp_pv.o
 obj-$(CONFIG_XEN_PVHVM_SMP)  	+= smp_hvm.o
 obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o
 obj-$(CONFIG_XEN_DEBUG_FS)	+= debugfs.o
 obj-$(CONFIG_XEN_DOM0)		+= vga.o
 obj-$(CONFIG_SWIOTLB_XEN)	+= pci-swiotlb-xen.o
 obj-$(CONFIG_XEN_EFI)		+= efi.o
-obj-$(CONFIG_XEN_PVH)	 	+= xen-pvh.o
--- a/arch/x86/xen/efi.c
+++ b/arch/x86/xen/efi.c
+// SPDX-License-Identifier: GPL-2.0
 /*
 * Copyright (c) 2014 Oracle Co., Daniel Kiper
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 #include <linux/bitops.h>

--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
+// SPDX-License-Identifier: GPL-2.0
 #ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
 #include <linux/bootmem.h>
 #endif

--- a/arch/x86/xen/enlighten_hvm.c
+++ b/arch/x86/xen/enlighten_hvm.c
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/acpi.h>
 #include <linux/cpu.h>
 #include <linux/kexec.h>

--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -995,11 +995,14 @@ void __init xen_setup_vcpu_info_placement(void)
 	 * percpu area for all cpus, so make use of it.
 	 */
 	if (xen_have_vcpu_info_placement) {
-		pv_irq_ops.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct);
+		pv_ops.irq.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct);
-		pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(xen_restore_fl_direct);
+		pv_ops.irq.restore_fl =
-		pv_irq_ops.irq_disable = __PV_IS_CALLEE_SAVE(xen_irq_disable_direct);
+			__PV_IS_CALLEE_SAVE(xen_restore_fl_direct);
-		pv_irq_ops.irq_enable = __PV_IS_CALLEE_SAVE(xen_irq_enable_direct);
+		pv_ops.irq.irq_disable =
-		pv_mmu_ops.read_cr2 = xen_read_cr2_direct;
+			__PV_IS_CALLEE_SAVE(xen_irq_disable_direct);
+		pv_ops.irq.irq_enable =
+			__PV_IS_CALLEE_SAVE(xen_irq_enable_direct);
+		pv_ops.mmu.read_cr2 = xen_read_cr2_direct;
 	}
 }
@@ -1174,14 +1177,14 @@ static void __init xen_boot_params_init_edd(void)
 */
 static void __init xen_setup_gdt(int cpu)
 {
-	pv_cpu_ops.write_gdt_entry = xen_write_gdt_entry_boot;
+	pv_ops.cpu.write_gdt_entry = xen_write_gdt_entry_boot;
-	pv_cpu_ops.load_gdt = xen_load_gdt_boot;
+	pv_ops.cpu.load_gdt = xen_load_gdt_boot;
 	setup_stack_canary_segment(cpu);
 	switch_to_new_gdt(cpu);
-	pv_cpu_ops.write_gdt_entry = xen_write_gdt_entry;
+	pv_ops.cpu.write_gdt_entry = xen_write_gdt_entry;
-	pv_cpu_ops.load_gdt = xen_load_gdt;
+	pv_ops.cpu.load_gdt = xen_load_gdt;
 }
 static void __init xen_dom0_set_legacy_features(void)
@@ -1206,8 +1209,8 @@ asmlinkage __visible void __init xen_start_kernel(void)
 	/* Install Xen paravirt ops */
 	pv_info = xen_info;
-	pv_init_ops.patch = paravirt_patch_default;
+	pv_ops.init.patch = paravirt_patch_default;
-	pv_cpu_ops = xen_cpu_ops;
+	pv_ops.cpu = xen_cpu_ops;
 	xen_init_irq_ops();
 	/*
@@ -1276,8 +1279,10 @@ asmlinkage __visible void __init xen_start_kernel(void)
 #endif
 	if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) {
-		pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start;
+		pv_ops.mmu.ptep_modify_prot_start =
-		pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit;
+			xen_ptep_modify_prot_start;
+		pv_ops.mmu.ptep_modify_prot_commit =
+			xen_ptep_modify_prot_commit;
 	}
 	machine_ops = xen_machine_ops;

--- a/arch/x86/xen/grant-table.c
+++ b/arch/x86/xen/grant-table.c
+// SPDX-License-Identifier: GPL-2.0 OR MIT
 /******************************************************************************
 * grant_table.c
 * x86 specific part
@@ -8,30 +9,6 @@
 * Copyright (c) 2004-2005, K A Fraser
 * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
 *                    VA Linux Systems Japan. Split out x86 specific part.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation; or, when distributed
- * separately from the Linux kernel or incorporated into other
- * software packages, subject to the following license:
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this source file (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use, copy, modify,
- * merge, publish, distribute, sublicense, and/or sell copies of the Software,
- * and to permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
 */
 #include <linux/sched.h>

--- a/arch/x86/xen/irq.c
+++ b/arch/x86/xen/irq.c
@@ -128,6 +128,6 @@ static const struct pv_irq_ops xen_irq_ops __initconst = {
 void __init xen_init_irq_ops(void)
 {
-	pv_irq_ops = xen_irq_ops;
+	pv_ops.irq = xen_irq_ops;
 	x86_init.irqs.intr_init = xen_init_IRQ;
 }
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/pfn.h>
 #include <asm/xen/page.h>
 #include <asm/xen/hypercall.h>
@@ -6,12 +8,6 @@
 #include "multicalls.h"
 #include "mmu.h"
-/*
- * Protects atomic reservation decrease/increase against concurrent increases.
- * Also protects non-atomic updates of current_pages and balloon lists.
- */
-DEFINE_SPINLOCK(xen_reservation_lock);
 unsigned long arbitrary_virt_to_mfn(void *vaddr)
 {
 	xmaddr_t maddr = arbitrary_virt_to_machine(vaddr);
@@ -42,186 +38,6 @@ xmaddr_t arbitrary_virt_to_machine(void *vaddr)
 }
 EXPORT_SYMBOL_GPL(arbitrary_virt_to_machine);
-static noinline void xen_flush_tlb_all(void)
-{
-	struct mmuext_op *op;
-	struct multicall_space mcs;
-	preempt_disable();
-	mcs = xen_mc_entry(sizeof(*op));
-	op = mcs.args;
-	op->cmd = MMUEXT_TLB_FLUSH_ALL;
-	MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
-	xen_mc_issue(PARAVIRT_LAZY_MMU);
-	preempt_enable();
-}
-#define REMAP_BATCH_SIZE 16
-struct remap_data {
-	xen_pfn_t *pfn;
-	bool contiguous;
-	bool no_translate;
-	pgprot_t prot;
-	struct mmu_update *mmu_update;
-};
-static int remap_area_pfn_pte_fn(pte_t *ptep, pgtable_t token,
-				 unsigned long addr, void *data)
-{
-	struct remap_data *rmd = data;
-	pte_t pte = pte_mkspecial(mfn_pte(*rmd->pfn, rmd->prot));
-	/*
-	 * If we have a contiguous range, just update the pfn itself,
-	 * else update pointer to be "next pfn".
-	 */
-	if (rmd->contiguous)
-		(*rmd->pfn)++;
-	else
-		rmd->pfn++;
-	rmd->mmu_update->ptr = virt_to_machine(ptep).maddr;
-	rmd->mmu_update->ptr |= rmd->no_translate ?
-		MMU_PT_UPDATE_NO_TRANSLATE :
-		MMU_NORMAL_PT_UPDATE;
-	rmd->mmu_update->val = pte_val_ma(pte);
-	rmd->mmu_update++;
-	return 0;
-}
-static int do_remap_pfn(struct vm_area_struct *vma,
-			unsigned long addr,
-			xen_pfn_t *pfn, int nr,
-			int *err_ptr, pgprot_t prot,
-			unsigned int domid,
-			bool no_translate,
-			struct page **pages)
-{
-	int err = 0;
-	struct remap_data rmd;
-	struct mmu_update mmu_update[REMAP_BATCH_SIZE];
-	unsigned long range;
-	int mapped = 0;
-	BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO)));
-	rmd.pfn = pfn;
-	rmd.prot = prot;
-	/*
-	 * We use the err_ptr to indicate if there we are doing a contiguous
-	 * mapping or a discontigious mapping.
-	 */
-	rmd.contiguous = !err_ptr;
-	rmd.no_translate = no_translate;
-	while (nr) {
-		int index = 0;
-		int done = 0;
-		int batch = min(REMAP_BATCH_SIZE, nr);
-		int batch_left = batch;
-		range = (unsigned long)batch << PAGE_SHIFT;
-		rmd.mmu_update = mmu_update;
-		err = apply_to_page_range(vma->vm_mm, addr, range,
-					  remap_area_pfn_pte_fn, &rmd);
-		if (err)
-			goto out;
-		/* We record the error for each page that gives an error, but
-		 * continue mapping until the whole set is done */
-		do {
-			int i;
-			err = HYPERVISOR_mmu_update(&mmu_update[index],
-						    batch_left, &done, domid);
-			/*
-			 * @err_ptr may be the same buffer as @gfn, so
-			 * only clear it after each chunk of @gfn is
-			 * used.
-			 */
-			if (err_ptr) {
-				for (i = index; i < index + done; i++)
-					err_ptr[i] = 0;
-			}
-			if (err < 0) {
-				if (!err_ptr)
-					goto out;
-				err_ptr[i] = err;
-				done++; /* Skip failed frame. */
-			} else
-				mapped += done;
-			batch_left -= done;
-			index += done;
-		} while (batch_left);
-		nr -= batch;
-		addr += range;
-		if (err_ptr)
-			err_ptr += batch;
-		cond_resched();
-	}
-out:
-	xen_flush_tlb_all();
-	return err < 0 ? err : mapped;
-}
-int xen_remap_domain_gfn_range(struct vm_area_struct *vma,
-			       unsigned long addr,
-			       xen_pfn_t gfn, int nr,
-			       pgprot_t prot, unsigned domid,
-			       struct page **pages)
-{
-	if (xen_feature(XENFEAT_auto_translated_physmap))
-		return -EOPNOTSUPP;
-	return do_remap_pfn(vma, addr, &gfn, nr, NULL, prot, domid, false,
-			    pages);
-}
-EXPORT_SYMBOL_GPL(xen_remap_domain_gfn_range);
-int xen_remap_domain_gfn_array(struct vm_area_struct *vma,
-			       unsigned long addr,
-			       xen_pfn_t *gfn, int nr,
-			       int *err_ptr, pgprot_t prot,
-			       unsigned domid, struct page **pages)
-{
-	if (xen_feature(XENFEAT_auto_translated_physmap))
-		return xen_xlate_remap_gfn_array(vma, addr, gfn, nr, err_ptr,
-						 prot, domid, pages);
-	/* We BUG_ON because it's a programmer error to pass a NULL err_ptr,
-	 * and the consequences later is quite hard to detect what the actual
-	 * cause of "wrong memory was mapped in".
-	 */
-	BUG_ON(err_ptr == NULL);
-	return do_remap_pfn(vma, addr, gfn, nr, err_ptr, prot, domid,
-			    false, pages);
-}
-EXPORT_SYMBOL_GPL(xen_remap_domain_gfn_array);
-int xen_remap_domain_mfn_array(struct vm_area_struct *vma,
-			       unsigned long addr,
-			       xen_pfn_t *mfn, int nr,
-			       int *err_ptr, pgprot_t prot,
-			       unsigned int domid, struct page **pages)
-{
-	if (xen_feature(XENFEAT_auto_translated_physmap))
-		return -EOPNOTSUPP;
-	return do_remap_pfn(vma, addr, mfn, nr, err_ptr, prot, domid,
-			    true, pages);
-}
-EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_array);
 /* Returns: 0 success */
 int xen_unmap_domain_gfn_range(struct vm_area_struct *vma,
 			       int nr, struct page **pages)

--- a/arch/x86/xen/mmu_hvm.c
+++ b/arch/x86/xen/mmu_hvm.c
@@ -73,7 +73,7 @@ static int is_pagetable_dying_supported(void)
 void __init xen_hvm_init_mmu_ops(void)
 {
 	if (is_pagetable_dying_supported())
-		pv_mmu_ops.exit_mmap = xen_hvm_exit_mmap;
+		pv_ops.mmu.exit_mmap = xen_hvm_exit_mmap;
 #ifdef CONFIG_PROC_VMCORE
 	WARN_ON(register_oldmem_pfn_is_ram(&xen_oldmem_pfn_is_ram));
 #endif

--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
+// SPDX-License-Identifier: GPL-2.0
 /*
 * Xen mmu operations
 *
@@ -98,6 +100,12 @@ static RESERVE_BRK_ARRAY(pte_t, level1_ident_pgt, LEVEL1_IDENT_ENTRIES);
 static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss;
 #endif /* CONFIG_X86_64 */
+/*
+ * Protects atomic reservation decrease/increase against concurrent increases.
+ * Also protects non-atomic updates of current_pages and balloon lists.
+ */
+static DEFINE_SPINLOCK(xen_reservation_lock);
 /*
 * Note about cr3 (pagetable base) values:
 *
@@ -2209,7 +2217,7 @@ static void __init xen_write_cr3_init(unsigned long cr3)
 	set_page_prot(initial_page_table, PAGE_KERNEL);
 	set_page_prot(initial_kernel_pmd, PAGE_KERNEL);
-	pv_mmu_ops.write_cr3 = &xen_write_cr3;
+	pv_ops.mmu.write_cr3 = &xen_write_cr3;
 }
 /*
@@ -2358,27 +2366,27 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
 static void __init xen_post_allocator_init(void)
 {
-	pv_mmu_ops.set_pte = xen_set_pte;
+	pv_ops.mmu.set_pte = xen_set_pte;
-	pv_mmu_ops.set_pmd = xen_set_pmd;
+	pv_ops.mmu.set_pmd = xen_set_pmd;
-	pv_mmu_ops.set_pud = xen_set_pud;
+	pv_ops.mmu.set_pud = xen_set_pud;
 #ifdef CONFIG_X86_64
-	pv_mmu_ops.set_p4d = xen_set_p4d;
+	pv_ops.mmu.set_p4d = xen_set_p4d;
 #endif
 	/* This will work as long as patching hasn't happened yet
 	   (which it hasn't) */
-	pv_mmu_ops.alloc_pte = xen_alloc_pte;
+	pv_ops.mmu.alloc_pte = xen_alloc_pte;
-	pv_mmu_ops.alloc_pmd = xen_alloc_pmd;
+	pv_ops.mmu.alloc_pmd = xen_alloc_pmd;
-	pv_mmu_ops.release_pte = xen_release_pte;
+	pv_ops.mmu.release_pte = xen_release_pte;
-	pv_mmu_ops.release_pmd = xen_release_pmd;
+	pv_ops.mmu.release_pmd = xen_release_pmd;
 #ifdef CONFIG_X86_64
-	pv_mmu_ops.alloc_pud = xen_alloc_pud;
+	pv_ops.mmu.alloc_pud = xen_alloc_pud;
-	pv_mmu_ops.release_pud = xen_release_pud;
+	pv_ops.mmu.release_pud = xen_release_pud;
 #endif
-	pv_mmu_ops.make_pte = PV_CALLEE_SAVE(xen_make_pte);
+	pv_ops.mmu.make_pte = PV_CALLEE_SAVE(xen_make_pte);
 #ifdef CONFIG_X86_64
-	pv_mmu_ops.write_cr3 = &xen_write_cr3;
+	pv_ops.mmu.write_cr3 = &xen_write_cr3;
 #endif
 }
@@ -2466,7 +2474,7 @@ void __init xen_init_mmu_ops(void)
 	x86_init.paging.pagetable_init = xen_pagetable_init;
 	x86_init.hyper.init_after_bootmem = xen_after_bootmem;
-	pv_mmu_ops = xen_mmu_ops;
+	pv_ops.mmu = xen_mmu_ops;
 	memset(dummy_mapping, 0xff, PAGE_SIZE);
 }
@@ -2666,6 +2674,138 @@ void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order)
 }
 EXPORT_SYMBOL_GPL(xen_destroy_contiguous_region);
+static noinline void xen_flush_tlb_all(void)
+{
+	struct mmuext_op *op;
+	struct multicall_space mcs;
+	preempt_disable();
+	mcs = xen_mc_entry(sizeof(*op));
+	op = mcs.args;
+	op->cmd = MMUEXT_TLB_FLUSH_ALL;
+	MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
+	xen_mc_issue(PARAVIRT_LAZY_MMU);
+	preempt_enable();
+}
+#define REMAP_BATCH_SIZE 16
+struct remap_data {
+	xen_pfn_t *pfn;
+	bool contiguous;
+	bool no_translate;
+	pgprot_t prot;
+	struct mmu_update *mmu_update;
+};
+static int remap_area_pfn_pte_fn(pte_t *ptep, pgtable_t token,
+				 unsigned long addr, void *data)
+{
+	struct remap_data *rmd = data;
+	pte_t pte = pte_mkspecial(mfn_pte(*rmd->pfn, rmd->prot));
+	/*
+	 * If we have a contiguous range, just update the pfn itself,
+	 * else update pointer to be "next pfn".
+	 */
+	if (rmd->contiguous)
+		(*rmd->pfn)++;
+	else
+		rmd->pfn++;
+	rmd->mmu_update->ptr = virt_to_machine(ptep).maddr;
+	rmd->mmu_update->ptr |= rmd->no_translate ?
+		MMU_PT_UPDATE_NO_TRANSLATE :
+		MMU_NORMAL_PT_UPDATE;
+	rmd->mmu_update->val = pte_val_ma(pte);
+	rmd->mmu_update++;
+	return 0;
+}
+int xen_remap_pfn(struct vm_area_struct *vma, unsigned long addr,
+		  xen_pfn_t *pfn, int nr, int *err_ptr, pgprot_t prot,
+		  unsigned int domid, bool no_translate, struct page **pages)
+{
+	int err = 0;
+	struct remap_data rmd;
+	struct mmu_update mmu_update[REMAP_BATCH_SIZE];
+	unsigned long range;
+	int mapped = 0;
+	BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO)));
+	rmd.pfn = pfn;
+	rmd.prot = prot;
+	/*
+	 * We use the err_ptr to indicate if there we are doing a contiguous
+	 * mapping or a discontigious mapping.
+	 */
+	rmd.contiguous = !err_ptr;
+	rmd.no_translate = no_translate;
+	while (nr) {
+		int index = 0;
+		int done = 0;
+		int batch = min(REMAP_BATCH_SIZE, nr);
+		int batch_left = batch;
+		range = (unsigned long)batch << PAGE_SHIFT;
+		rmd.mmu_update = mmu_update;
+		err = apply_to_page_range(vma->vm_mm, addr, range,
+					  remap_area_pfn_pte_fn, &rmd);
+		if (err)
+			goto out;
+		/*
+		 * We record the error for each page that gives an error, but
+		 * continue mapping until the whole set is done
+		 */
+		do {
+			int i;
+			err = HYPERVISOR_mmu_update(&mmu_update[index],
+						    batch_left, &done, domid);
+			/*
+			 * @err_ptr may be the same buffer as @gfn, so
+			 * only clear it after each chunk of @gfn is
+			 * used.
+			 */
+			if (err_ptr) {
+				for (i = index; i < index + done; i++)
+					err_ptr[i] = 0;
+			}
+			if (err < 0) {
+				if (!err_ptr)
+					goto out;
+				err_ptr[i] = err;
+				done++; /* Skip failed frame. */
+			} else
+				mapped += done;
+			batch_left -= done;
+			index += done;
+		} while (batch_left);
+		nr -= batch;
+		addr += range;
+		if (err_ptr)
+			err_ptr += batch;
+		cond_resched();
+	}
+out:
+	xen_flush_tlb_all();
+	return err < 0 ? err : mapped;
+}
+EXPORT_SYMBOL_GPL(xen_remap_pfn);
 #ifdef CONFIG_KEXEC_CORE
 phys_addr_t paddr_vmcoreinfo_note(void)
 {

--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
+// SPDX-License-Identifier: GPL-2.0
 /*
 * Xen leaves the responsibility for maintaining p2m mappings to the
 * guests themselves, but it must also access and update the p2m array

--- a/arch/x86/xen/pci-swiotlb-xen.c
+++ b/arch/x86/xen/pci-swiotlb-xen.c
+// SPDX-License-Identifier: GPL-2.0
 /* Glue code to lib/swiotlb-xen.c */
 #include <linux/dma-mapping.h>

--- a/arch/x86/xen/platform-pci-unplug.c
+++ b/arch/x86/xen/platform-pci-unplug.c
+// SPDX-License-Identifier: GPL-2.0
 /******************************************************************************
 * platform-pci-unplug.c
 *
 * Xen platform PCI device driver
 * Copyright (c) 2010, Citrix
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
 */
 #include <linux/init.h>
@@ -31,7 +19,6 @@
 #define XEN_PLATFORM_ERR_PROTOCOL -2
 #define XEN_PLATFORM_ERR_BLACKLIST -3
-#ifdef CONFIG_XEN_PVHVM
 /* store the value of xen_emul_unplug after the unplug is done */
 static int xen_platform_pci_unplug;
 static int xen_emul_unplug;
@@ -215,4 +202,3 @@ static int __init parse_xen_emul_unplug(char *arg)
 	return 0;
 }
 early_param("xen_emul_unplug", parse_xen_emul_unplug);
-#endif
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -141,11 +141,12 @@ void __init xen_init_spinlocks(void)
 	printk(KERN_DEBUG "xen: PV spinlocks enabled\n");
 	__pv_init_lock_hash();
-	pv_lock_ops.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath;
+	pv_ops.lock.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath;
-	pv_lock_ops.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock);
+	pv_ops.lock.queued_spin_unlock =
-	pv_lock_ops.wait = xen_qlock_wait;
+		PV_CALLEE_SAVE(__pv_queued_spin_unlock);
-	pv_lock_ops.kick = xen_qlock_kick;
+	pv_ops.lock.wait = xen_qlock_wait;
-	pv_lock_ops.vcpu_is_preempted = PV_CALLEE_SAVE(xen_vcpu_stolen);
+	pv_ops.lock.kick = xen_qlock_kick;
+	pv_ops.lock.vcpu_is_preempted = PV_CALLEE_SAVE(xen_vcpu_stolen);
 }
 static __init int xen_parse_nopvspin(char *arg)

--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -513,7 +513,7 @@ static void __init xen_time_init(void)
 void __init xen_init_time_ops(void)
 {
 	xen_sched_clock_offset = xen_clocksource_read();
-	pv_time_ops = xen_time_ops;
+	pv_ops.time = xen_time_ops;
 	x86_init.timers.timer_init = xen_time_init;
 	x86_init.timers.setup_percpu_clockev = x86_init_noop;
@@ -555,7 +555,7 @@ void __init xen_hvm_init_time_ops(void)
 	}
 	xen_sched_clock_offset = xen_clocksource_read();
-	pv_time_ops = xen_time_ops;
+	pv_ops.time = xen_time_ops;
 	x86_init.timers.setup_percpu_clockev = xen_time_init;
 	x86_cpuinit.setup_percpu_clockev = xen_hvm_setup_cpu_clockevents;

--- a/arch/x86/xen/vdso.h
+++ b/arch/x86/xen/vdso.h
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Bit used for the pseudo-hwcap for non-negative segments.  We use
   bit 1 to avoid bugs in some versions of glibc when bit 0 is
   used; the choice is otherwise arbitrary. */

--- a/arch/x86/xen/xen-pvh.S
+++ b/arch/x86/xen/xen-pvh.S
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
 * Copyright C 2016, Oracle and/or its affiliates. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 	.code32

--- a/drivers/xen/time.c
+++ b/drivers/xen/time.c
@@ -175,7 +175,7 @@ void __init xen_time_setup_guest(void)
 	xen_runstate_remote = !HYPERVISOR_vm_assist(VMASST_CMD_enable,
 					VMASST_TYPE_runstate_update_flag);
-	pv_time_ops.steal_clock = xen_steal_clock;
+	pv_ops.time.steal_clock = xen_steal_clock;
 	static_key_slow_inc(&paravirt_steal_enabled);
 	if (xen_runstate_remote)

--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -89,11 +89,13 @@ unsigned irq_from_evtchn(unsigned int evtchn);
 int irq_from_virq(unsigned int cpu, unsigned int virq);
 unsigned int evtchn_from_irq(unsigned irq);
+#ifdef CONFIG_XEN_PVHVM
 /* Xen HVM evtchn vector callback */
 void xen_hvm_callback_vector(void);
 #ifdef CONFIG_TRACING
 #define trace_xen_hvm_callback_vector xen_hvm_callback_vector
 #endif
+#endif
 int xen_set_callback_via(uint64_t via);
 void xen_evtchn_do_upcall(struct pt_regs *regs);
 void xen_hvm_evtchn_do_upcall(void);

--- a/include/xen/interface/memory.h
+++ b/include/xen/interface/memory.h
@@ -244,12 +244,6 @@ DEFINE_GUEST_HANDLE_STRUCT(xen_memory_map);
 #define XENMEM_machine_memory_map   10
-/*
- * Prevent the balloon driver from changing the memory reservation
- * during a driver critical region.
- */
-extern spinlock_t xen_reservation_lock;
 /*
 * Unmaps the page appearing at a particular GPFN from the specified guest's
 * pseudophysical address space.

--- a/include/xen/xen-ops.h
+++ b/include/xen/xen-ops.h
@@ -5,6 +5,7 @@
 #include <linux/percpu.h>
 #include <linux/notifier.h>
 #include <linux/efi.h>
+#include <xen/features.h>
 #include <asm/xen/interface.h>
 #include <xen/interface/vcpu.h>
@@ -47,6 +48,10 @@ int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order,
 				dma_addr_t *dma_handle);
 void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order);
+int xen_remap_pfn(struct vm_area_struct *vma, unsigned long addr,
+		  xen_pfn_t *pfn, int nr, int *err_ptr, pgprot_t prot,
+		  unsigned int domid, bool no_translate, struct page **pages);
 #else
 static inline int xen_create_contiguous_region(phys_addr_t pstart,
 					       unsigned int order,
@@ -58,10 +63,50 @@ static inline int xen_create_contiguous_region(phys_addr_t pstart,
 static inline void xen_destroy_contiguous_region(phys_addr_t pstart,
 						 unsigned int order) { }
+static inline int xen_remap_pfn(struct vm_area_struct *vma, unsigned long addr,
+				xen_pfn_t *pfn, int nr, int *err_ptr,
+				pgprot_t prot,  unsigned int domid,
+				bool no_translate, struct page **pages)
+{
+	BUG();
+	return 0;
+}
 #endif
 struct vm_area_struct;
+#ifdef CONFIG_XEN_AUTO_XLATE
+int xen_xlate_remap_gfn_array(struct vm_area_struct *vma,
+			      unsigned long addr,
+			      xen_pfn_t *gfn, int nr,
+			      int *err_ptr, pgprot_t prot,
+			      unsigned int domid,
+			      struct page **pages);
+int xen_xlate_unmap_gfn_range(struct vm_area_struct *vma,
+			      int nr, struct page **pages);
+#else
+/*
+ * These two functions are called from arch/x86/xen/mmu.c and so stubs
+ * are needed for a configuration not specifying CONFIG_XEN_AUTO_XLATE.
+ */
+static inline int xen_xlate_remap_gfn_array(struct vm_area_struct *vma,
+					    unsigned long addr,
+					    xen_pfn_t *gfn, int nr,
+					    int *err_ptr, pgprot_t prot,
+					    unsigned int domid,
+					    struct page **pages)
+{
+	return -EOPNOTSUPP;
+}
+static inline int xen_xlate_unmap_gfn_range(struct vm_area_struct *vma,
+					    int nr, struct page **pages)
+{
+	return -EOPNOTSUPP;
+}
+#endif
 /*
 * xen_remap_domain_gfn_array() - map an array of foreign frames by gfn
 * @vma:     VMA to map the pages into
@@ -79,12 +124,25 @@ struct vm_area_struct;
 * Returns the number of successfully mapped frames, or a -ve error
 * code.
 */
-int xen_remap_domain_gfn_array(struct vm_area_struct *vma,
+static inline int xen_remap_domain_gfn_array(struct vm_area_struct *vma,
 					     unsigned long addr,
 					     xen_pfn_t *gfn, int nr,
 					     int *err_ptr, pgprot_t prot,
-			       unsigned domid,
+					     unsigned int domid,
-			       struct page **pages);
+					     struct page **pages)
+{
+	if (xen_feature(XENFEAT_auto_translated_physmap))
+		return xen_xlate_remap_gfn_array(vma, addr, gfn, nr, err_ptr,
+						 prot, domid, pages);
+	/* We BUG_ON because it's a programmer error to pass a NULL err_ptr,
+	 * and the consequences later is quite hard to detect what the actual
+	 * cause of "wrong memory was mapped in".
+	 */
+	BUG_ON(err_ptr == NULL);
+	return xen_remap_pfn(vma, addr, gfn, nr, err_ptr, prot, domid,
+			     false, pages);
+}
 /*
 * xen_remap_domain_mfn_array() - map an array of foreign frames by mfn
@@ -103,10 +161,18 @@ int xen_remap_domain_gfn_array(struct vm_area_struct *vma,
 * Returns the number of successfully mapped frames, or a -ve error
 * code.
 */
-int xen_remap_domain_mfn_array(struct vm_area_struct *vma,
+static inline int xen_remap_domain_mfn_array(struct vm_area_struct *vma,
-			       unsigned long addr, xen_pfn_t *mfn, int nr,
+					     unsigned long addr, xen_pfn_t *mfn,
-			       int *err_ptr, pgprot_t prot,
+					     int nr, int *err_ptr,
-			       unsigned int domid, struct page **pages);
+					     pgprot_t prot, unsigned int domid,
+					     struct page **pages)
+{
+	if (xen_feature(XENFEAT_auto_translated_physmap))
+		return -EOPNOTSUPP;
+	return xen_remap_pfn(vma, addr, mfn, nr, err_ptr, prot, domid,
+			     true, pages);
+}
 /* xen_remap_domain_gfn_range() - map a range of foreign frames
 * @vma:     VMA to map the pages into
@@ -120,44 +186,21 @@ int xen_remap_domain_mfn_array(struct vm_area_struct *vma,
 * Returns the number of successfully mapped frames, or a -ve error
 * code.
 */
-int xen_remap_domain_gfn_range(struct vm_area_struct *vma,
+static inline int xen_remap_domain_gfn_range(struct vm_area_struct *vma,
 					     unsigned long addr,
 					     xen_pfn_t gfn, int nr,
-			       pgprot_t prot, unsigned domid,
+					     pgprot_t prot, unsigned int domid,
-			       struct page **pages);
-int xen_unmap_domain_gfn_range(struct vm_area_struct *vma,
-			       int numpgs, struct page **pages);
-#ifdef CONFIG_XEN_AUTO_XLATE
-int xen_xlate_remap_gfn_array(struct vm_area_struct *vma,
-			      unsigned long addr,
-			      xen_pfn_t *gfn, int nr,
-			      int *err_ptr, pgprot_t prot,
-			      unsigned domid,
-			      struct page **pages);
-int xen_xlate_unmap_gfn_range(struct vm_area_struct *vma,
-			      int nr, struct page **pages);
-#else
-/*
- * These two functions are called from arch/x86/xen/mmu.c and so stubs
- * are needed for a configuration not specifying CONFIG_XEN_AUTO_XLATE.
- */
-static inline int xen_xlate_remap_gfn_array(struct vm_area_struct *vma,
-					    unsigned long addr,
-					    xen_pfn_t *gfn, int nr,
-					    int *err_ptr, pgprot_t prot,
-					    unsigned int domid,
 					     struct page **pages)
 {
+	if (xen_feature(XENFEAT_auto_translated_physmap))
 		return -EOPNOTSUPP;
-}
-static inline int xen_xlate_unmap_gfn_range(struct vm_area_struct *vma,
+	return xen_remap_pfn(vma, addr, &gfn, nr, NULL, prot, domid, false,
-					    int nr, struct page **pages)
+			     pages);
-{
-	return -EOPNOTSUPP;
 }
-#endif
+int xen_unmap_domain_gfn_range(struct vm_area_struct *vma,
+			       int numpgs, struct page **pages);
 int xen_xlate_map_ballooned_pages(xen_pfn_t **pfns, void **vaddr,
 				  unsigned long nr_grant_frames);