提交 c9bed1cf 编写于 作者: L Linus Torvalds

Merge tag 'for-linus-4.5-rc0-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip

Pull xen updates from David Vrabel:
 "Xen features and fixes for 4.5-rc0:

   - Stolen ticks and PV wallclock support for arm/arm64

   - Add grant copy ioctl to gntdev device"

* tag 'for-linus-4.5-rc0-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip:
  xen/gntdev: add ioctl for grant copy
  x86/xen: don't reset vcpu_info on a cancelled suspend
  xen/gntdev: constify mmu_notifier_ops structures
  xen/grant-table: constify gnttab_ops structure
  xen/time: use READ_ONCE
  xen/x86: convert remaining timespec to timespec64 in xen_pvclock_gtod_notify
  xen/x86: support XENPF_settime64
  xen/arm: set the system time in Xen via the XENPF_settime64 hypercall
  xen/arm: introduce xen_read_wallclock
  arm: extend pvclock_wall_clock with sec_hi
  xen: introduce XENPF_settime64
  xen/arm: introduce HYPERVISOR_platform_op on arm and arm64
  xen: rename dom0_op to platform_op
  xen/arm: account for stolen ticks
  arm64: introduce CONFIG_PARAVIRT, PARAVIRT_TIME_ACCOUNTING and pv_time_ops
  arm: introduce CONFIG_PARAVIRT, PARAVIRT_TIME_ACCOUNTING and pv_time_ops
  missing include asm/paravirt.h in cputime.c
  xen: move xen_setup_runstate_info and get_runstate_snapshot to drivers/xen/time.c
......@@ -1824,6 +1824,25 @@ config SWIOTLB
config IOMMU_HELPER
def_bool SWIOTLB
config PARAVIRT
bool "Enable paravirtualization code"
help
This changes the kernel so it can modify itself when it is run
under a hypervisor, potentially improving performance significantly
over full virtualization.
config PARAVIRT_TIME_ACCOUNTING
bool "Paravirtual steal time accounting"
select PARAVIRT
default n
help
Select this option to enable fine granularity task steal time
accounting. Time spent executing other tasks in parallel with
the current vCPU is discounted from the vCPU power. To account for
that, there can be a small performance impact.
If in doubt, say N here.
config XEN_DOM0
def_bool y
depends on XEN
......@@ -1837,6 +1856,7 @@ config XEN
select ARCH_DMA_ADDR_T_64BIT
select ARM_PSCI
select SWIOTLB_XEN
select PARAVIRT
help
Say Y if you want to run Linux in a Virtual Machine on Xen on ARM.
......
#ifndef _ASM_ARM_PARAVIRT_H
#define _ASM_ARM_PARAVIRT_H
#ifdef CONFIG_PARAVIRT
struct static_key;
extern struct static_key paravirt_steal_enabled;
extern struct static_key paravirt_steal_rq_enabled;
struct pv_time_ops {
unsigned long long (*steal_clock)(int cpu);
};
extern struct pv_time_ops pv_time_ops;
static inline u64 paravirt_steal_clock(int cpu)
{
return pv_time_ops.steal_clock(cpu);
}
#endif
#endif
......@@ -35,6 +35,7 @@
#include <xen/interface/xen.h>
#include <xen/interface/sched.h>
#include <xen/interface/platform.h>
long privcmd_call(unsigned call, unsigned long a1,
unsigned long a2, unsigned long a3,
......@@ -49,6 +50,12 @@ int HYPERVISOR_memory_op(unsigned int cmd, void *arg);
int HYPERVISOR_physdev_op(int cmd, void *arg);
int HYPERVISOR_vcpu_op(int cmd, int vcpuid, void *extra_args);
int HYPERVISOR_tmem_op(void *arg);
int HYPERVISOR_platform_op_raw(void *arg);
static inline int HYPERVISOR_platform_op(struct xen_platform_op *op)
{
op->interface_version = XENPF_INTERFACE_VERSION;
return HYPERVISOR_platform_op_raw(op);
}
int HYPERVISOR_multicall(struct multicall_entry *calls, uint32_t nr);
static inline int
......
......@@ -27,6 +27,8 @@
(hnd).p = val; \
} while (0)
#define __HYPERVISOR_platform_op_raw __HYPERVISOR_platform_op
#ifndef __ASSEMBLY__
/* Explicitly size integers that represent pfns in the interface with
* Xen so that we can have one ABI that works for 32 and 64 bit guests.
......@@ -76,6 +78,7 @@ struct pvclock_wall_clock {
u32 version;
u32 sec;
u32 nsec;
u32 sec_hi;
} __attribute__((__packed__));
#endif
......
......@@ -81,6 +81,7 @@ obj-$(CONFIG_EFI) += efi.o
ifneq ($(CONFIG_ARCH_EBSA110),y)
obj-y += io.o
endif
obj-$(CONFIG_PARAVIRT) += paravirt.o
head-y := head$(MMUEXT).o
obj-$(CONFIG_DEBUG_LL) += debug.o
......
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* Copyright (C) 2013 Citrix Systems
*
* Author: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
*/
#include <linux/export.h>
#include <linux/jump_label.h>
#include <linux/types.h>
#include <asm/paravirt.h>
struct static_key paravirt_steal_enabled;
struct static_key paravirt_steal_rq_enabled;
struct pv_time_ops pv_time_ops;
EXPORT_SYMBOL_GPL(pv_time_ops);
......@@ -12,6 +12,7 @@
#include <xen/page.h>
#include <xen/interface/sched.h>
#include <xen/xen-ops.h>
#include <asm/paravirt.h>
#include <asm/xen/hypervisor.h>
#include <asm/xen/hypercall.h>
#include <asm/system_misc.h>
......@@ -25,6 +26,10 @@
#include <linux/cpufreq.h>
#include <linux/cpu.h>
#include <linux/console.h>
#include <linux/pvclock_gtod.h>
#include <linux/time64.h>
#include <linux/timekeeping.h>
#include <linux/timekeeper_internal.h>
#include <linux/mm.h>
......@@ -79,6 +84,83 @@ int xen_unmap_domain_gfn_range(struct vm_area_struct *vma,
}
EXPORT_SYMBOL_GPL(xen_unmap_domain_gfn_range);
static unsigned long long xen_stolen_accounting(int cpu)
{
struct vcpu_runstate_info state;
BUG_ON(cpu != smp_processor_id());
xen_get_runstate_snapshot(&state);
WARN_ON(state.state != RUNSTATE_running);
return state.time[RUNSTATE_runnable] + state.time[RUNSTATE_offline];
}
static void xen_read_wallclock(struct timespec64 *ts)
{
u32 version;
struct timespec64 now, ts_monotonic;
struct shared_info *s = HYPERVISOR_shared_info;
struct pvclock_wall_clock *wall_clock = &(s->wc);
/* get wallclock at system boot */
do {
version = wall_clock->version;
rmb(); /* fetch version before time */
now.tv_sec = ((uint64_t)wall_clock->sec_hi << 32) | wall_clock->sec;
now.tv_nsec = wall_clock->nsec;
rmb(); /* fetch time before checking version */
} while ((wall_clock->version & 1) || (version != wall_clock->version));
/* time since system boot */
ktime_get_ts64(&ts_monotonic);
*ts = timespec64_add(now, ts_monotonic);
}
static int xen_pvclock_gtod_notify(struct notifier_block *nb,
unsigned long was_set, void *priv)
{
/* Protected by the calling core code serialization */
static struct timespec64 next_sync;
struct xen_platform_op op;
struct timespec64 now, system_time;
struct timekeeper *tk = priv;
now.tv_sec = tk->xtime_sec;
now.tv_nsec = (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
system_time = timespec64_add(now, tk->wall_to_monotonic);
/*
* We only take the expensive HV call when the clock was set
* or when the 11 minutes RTC synchronization time elapsed.
*/
if (!was_set && timespec64_compare(&now, &next_sync) < 0)
return NOTIFY_OK;
op.cmd = XENPF_settime64;
op.u.settime64.mbz = 0;
op.u.settime64.secs = now.tv_sec;
op.u.settime64.nsecs = now.tv_nsec;
op.u.settime64.system_time = timespec64_to_ns(&system_time);
(void)HYPERVISOR_platform_op(&op);
/*
* Move the next drift compensation time 11 minutes
* ahead. That's emulating the sync_cmos_clock() update for
* the hardware RTC.
*/
next_sync = now;
next_sync.tv_sec += 11 * 60;
return NOTIFY_OK;
}
static struct notifier_block xen_pvclock_gtod_notifier = {
.notifier_call = xen_pvclock_gtod_notify,
};
static void xen_percpu_init(void)
{
struct vcpu_register_vcpu_info info;
......@@ -104,6 +186,8 @@ static void xen_percpu_init(void)
BUG_ON(err);
per_cpu(xen_vcpu, cpu) = vcpup;
xen_setup_runstate_info(cpu);
after_register_vcpu_info:
enable_percpu_irq(xen_events_irq, 0);
put_cpu();
......@@ -271,6 +355,11 @@ static int __init xen_guest_init(void)
register_cpu_notifier(&xen_cpu_notifier);
pv_time_ops.steal_clock = xen_stolen_accounting;
static_key_slow_inc(&paravirt_steal_enabled);
if (xen_initial_domain())
pvclock_gtod_register_notifier(&xen_pvclock_gtod_notifier);
return 0;
}
early_initcall(xen_guest_init);
......@@ -282,6 +371,11 @@ static int __init xen_pm_init(void)
pm_power_off = xen_power_off;
arm_pm_restart = xen_restart;
if (!xen_initial_domain()) {
struct timespec64 ts;
xen_read_wallclock(&ts);
do_settimeofday64(&ts);
}
return 0;
}
......@@ -307,5 +401,6 @@ EXPORT_SYMBOL_GPL(HYPERVISOR_memory_op);
EXPORT_SYMBOL_GPL(HYPERVISOR_physdev_op);
EXPORT_SYMBOL_GPL(HYPERVISOR_vcpu_op);
EXPORT_SYMBOL_GPL(HYPERVISOR_tmem_op);
EXPORT_SYMBOL_GPL(HYPERVISOR_platform_op);
EXPORT_SYMBOL_GPL(HYPERVISOR_multicall);
EXPORT_SYMBOL_GPL(privcmd_call);
......@@ -89,6 +89,7 @@ HYPERCALL2(memory_op);
HYPERCALL2(physdev_op);
HYPERCALL3(vcpu_op);
HYPERCALL1(tmem_op);
HYPERCALL1(platform_op_raw);
HYPERCALL2(multicall);
ENTRY(privcmd_call)
......
......@@ -555,6 +555,25 @@ config SECCOMP
and the task is only allowed to execute a few safe syscalls
defined by each seccomp mode.
config PARAVIRT
bool "Enable paravirtualization code"
help
This changes the kernel so it can modify itself when it is run
under a hypervisor, potentially improving performance significantly
over full virtualization.
config PARAVIRT_TIME_ACCOUNTING
bool "Paravirtual steal time accounting"
select PARAVIRT
default n
help
Select this option to enable fine granularity task steal time
accounting. Time spent executing other tasks in parallel with
the current vCPU is discounted from the vCPU power. To account for
that, there can be a small performance impact.
If in doubt, say N here.
config XEN_DOM0
def_bool y
depends on XEN
......@@ -563,6 +582,7 @@ config XEN
bool "Xen guest support on ARM64"
depends on ARM64 && OF
select SWIOTLB_XEN
select PARAVIRT
help
Say Y if you want to run Linux in a Virtual Machine on Xen on ARM64.
......
#ifndef _ASM_ARM64_PARAVIRT_H
#define _ASM_ARM64_PARAVIRT_H
#ifdef CONFIG_PARAVIRT
struct static_key;
extern struct static_key paravirt_steal_enabled;
extern struct static_key paravirt_steal_rq_enabled;
struct pv_time_ops {
unsigned long long (*steal_clock)(int cpu);
};
extern struct pv_time_ops pv_time_ops;
static inline u64 paravirt_steal_clock(int cpu)
{
return pv_time_ops.steal_clock(cpu);
}
#endif
#endif
......@@ -41,6 +41,7 @@ arm64-obj-$(CONFIG_EFI) += efi.o efi-entry.stub.o
arm64-obj-$(CONFIG_PCI) += pci.o
arm64-obj-$(CONFIG_ARMV8_DEPRECATED) += armv8_deprecated.o
arm64-obj-$(CONFIG_ACPI) += acpi.o
arm64-obj-$(CONFIG_PARAVIRT) += paravirt.o
obj-y += $(arm64-obj-y) vdso/
obj-m += $(arm64-obj-m)
......
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* Copyright (C) 2013 Citrix Systems
*
* Author: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
*/
#include <linux/export.h>
#include <linux/jump_label.h>
#include <linux/types.h>
#include <asm/paravirt.h>
struct static_key paravirt_steal_enabled;
struct static_key paravirt_steal_rq_enabled;
struct pv_time_ops pv_time_ops;
EXPORT_SYMBOL_GPL(pv_time_ops);
......@@ -80,6 +80,7 @@ HYPERCALL2(memory_op);
HYPERCALL2(physdev_op);
HYPERCALL3(vcpu_op);
HYPERCALL1(tmem_op);
HYPERCALL1(platform_op_raw);
HYPERCALL2(multicall);
ENTRY(privcmd_call)
......
......@@ -310,10 +310,10 @@ HYPERVISOR_mca(struct xen_mc *mc_op)
}
static inline int
HYPERVISOR_dom0_op(struct xen_platform_op *platform_op)
HYPERVISOR_platform_op(struct xen_platform_op *op)
{
platform_op->interface_version = XENPF_INTERFACE_VERSION;
return _hypercall1(int, dom0_op, platform_op);
op->interface_version = XENPF_INTERFACE_VERSION;
return _hypercall1(int, platform_op, op);
}
static inline int
......
......@@ -64,7 +64,7 @@ static u32 xen_apic_read(u32 reg)
if (reg != APIC_ID)
return 0;
ret = HYPERVISOR_dom0_op(&op);
ret = HYPERVISOR_platform_op(&op);
if (ret)
return 0;
......
......@@ -415,7 +415,7 @@ static bool __init xen_check_mwait(void)
set_xen_guest_handle(op.u.set_pminfo.pdc, buf);
if ((HYPERVISOR_dom0_op(&op) == 0) &&
if ((HYPERVISOR_platform_op(&op) == 0) &&
(buf[2] & (ACPI_PDC_C_C1_FFH | ACPI_PDC_C_C2C3_FFH))) {
cpuid_leaf5_ecx_val = cx;
cpuid_leaf5_edx_val = dx;
......@@ -1365,7 +1365,7 @@ static void __init xen_boot_params_init_edd(void)
info->params.length = sizeof(info->params);
set_xen_guest_handle(op.u.firmware_info.u.disk_info.edd_params,
&info->params);
ret = HYPERVISOR_dom0_op(&op);
ret = HYPERVISOR_platform_op(&op);
if (ret)
break;
......@@ -1383,7 +1383,7 @@ static void __init xen_boot_params_init_edd(void)
op.u.firmware_info.type = XEN_FW_DISK_MBR_SIGNATURE;
for (nr = 0; nr < EDD_MBR_SIG_MAX; nr++) {
op.u.firmware_info.index = nr;
ret = HYPERVISOR_dom0_op(&op);
ret = HYPERVISOR_platform_op(&op);
if (ret)
break;
mbr_signature[nr] = op.u.firmware_info.u.disk_mbr_signature.mbr_signature;
......@@ -1690,7 +1690,7 @@ asmlinkage __visible void __init xen_start_kernel(void)
xen_start_info->console.domU.mfn = 0;
xen_start_info->console.domU.evtchn = 0;
if (HYPERVISOR_dom0_op(&op) == 0)
if (HYPERVISOR_platform_op(&op) == 0)
boot_params.kbd_status = op.u.firmware_info.u.kbd_shift_flags;
/* Make sure ACS will be enabled */
......
......@@ -34,7 +34,8 @@ static void xen_hvm_post_suspend(int suspend_cancelled)
{
#ifdef CONFIG_XEN_PVHVM
int cpu;
xen_hvm_init_shared_info();
if (!suspend_cancelled)
xen_hvm_init_shared_info();
xen_callback_vector();
xen_unplug_emulated_devices();
if (xen_feature(XENFEAT_hvm_safe_pvclock)) {
......
......@@ -16,6 +16,7 @@
#include <linux/gfp.h>
#include <linux/slab.h>
#include <linux/pvclock_gtod.h>
#include <linux/timekeeper_internal.h>
#include <asm/pvclock.h>
#include <asm/xen/hypervisor.h>
......@@ -32,86 +33,12 @@
#define TIMER_SLOP 100000
#define NS_PER_TICK (1000000000LL / HZ)
/* runstate info updated by Xen */
static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate);
/* snapshots of runstate info */
static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate_snapshot);
/* unused ns of stolen time */
static DEFINE_PER_CPU(u64, xen_residual_stolen);
/* return an consistent snapshot of 64-bit time/counter value */
static u64 get64(const u64 *p)
{
u64 ret;
if (BITS_PER_LONG < 64) {
u32 *p32 = (u32 *)p;
u32 h, l;
/*
* Read high then low, and then make sure high is
* still the same; this will only loop if low wraps
* and carries into high.
* XXX some clean way to make this endian-proof?
*/
do {
h = p32[1];
barrier();
l = p32[0];
barrier();
} while (p32[1] != h);
ret = (((u64)h) << 32) | l;
} else
ret = *p;
return ret;
}
/*
* Runstate accounting
*/
static void get_runstate_snapshot(struct vcpu_runstate_info *res)
{
u64 state_time;
struct vcpu_runstate_info *state;
BUG_ON(preemptible());
state = this_cpu_ptr(&xen_runstate);
/*
* The runstate info is always updated by the hypervisor on
* the current CPU, so there's no need to use anything
* stronger than a compiler barrier when fetching it.
*/
do {
state_time = get64(&state->state_entry_time);
barrier();
*res = *state;
barrier();
} while (get64(&state->state_entry_time) != state_time);
}
/* return true when a vcpu could run but has no real cpu to run on */
bool xen_vcpu_stolen(int vcpu)
{
return per_cpu(xen_runstate, vcpu).state == RUNSTATE_runnable;
}
void xen_setup_runstate_info(int cpu)
{
struct vcpu_register_runstate_memory_area area;
area.addr.v = &per_cpu(xen_runstate, cpu);
if (HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area,
cpu, &area))
BUG();
}
static void do_stolen_accounting(void)
{
struct vcpu_runstate_info state;
......@@ -119,7 +46,7 @@ static void do_stolen_accounting(void)
s64 runnable, offline, stolen;
cputime_t ticks;
get_runstate_snapshot(&state);
xen_get_runstate_snapshot(&state);
WARN_ON(state.state != RUNSTATE_running);
......@@ -194,26 +121,46 @@ static int xen_pvclock_gtod_notify(struct notifier_block *nb,
unsigned long was_set, void *priv)
{
/* Protected by the calling core code serialization */
static struct timespec next_sync;
static struct timespec64 next_sync;
struct xen_platform_op op;
struct timespec now;
struct timespec64 now;
struct timekeeper *tk = priv;
static bool settime64_supported = true;
int ret;
now = __current_kernel_time();
now.tv_sec = tk->xtime_sec;
now.tv_nsec = (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
/*
* We only take the expensive HV call when the clock was set
* or when the 11 minutes RTC synchronization time elapsed.
*/
if (!was_set && timespec_compare(&now, &next_sync) < 0)
if (!was_set && timespec64_compare(&now, &next_sync) < 0)
return NOTIFY_OK;
op.cmd = XENPF_settime;
op.u.settime.secs = now.tv_sec;
op.u.settime.nsecs = now.tv_nsec;
op.u.settime.system_time = xen_clocksource_read();
again:
if (settime64_supported) {
op.cmd = XENPF_settime64;
op.u.settime64.mbz = 0;
op.u.settime64.secs = now.tv_sec;
op.u.settime64.nsecs = now.tv_nsec;
op.u.settime64.system_time = xen_clocksource_read();
} else {
op.cmd = XENPF_settime32;
op.u.settime32.secs = now.tv_sec;
op.u.settime32.nsecs = now.tv_nsec;
op.u.settime32.system_time = xen_clocksource_read();
}
ret = HYPERVISOR_platform_op(&op);
(void)HYPERVISOR_dom0_op(&op);
if (ret == -ENOSYS && settime64_supported) {
settime64_supported = false;
goto again;
}
if (ret < 0)
return NOTIFY_BAD;
/*
* Move the next drift compensation time 11 minutes
......
obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o
obj-$(CONFIG_X86) += fallback.o
obj-y += grant-table.o features.o balloon.o manage.o preempt.o
obj-y += grant-table.o features.o balloon.o manage.o preempt.o time.o
obj-y += events/
obj-y += xenbus/
......
......@@ -58,7 +58,7 @@ static int xen_acpi_notify_hypervisor_state(u8 sleep_state,
bits, val_a, val_b))
return -1;
HYPERVISOR_dom0_op(&op);
HYPERVISOR_platform_op(&op);
return 1;
}
......
......@@ -42,7 +42,7 @@ static efi_status_t xen_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc)
{
struct xen_platform_op op = INIT_EFI_OP(get_time);
if (HYPERVISOR_dom0_op(&op) < 0)
if (HYPERVISOR_platform_op(&op) < 0)
return EFI_UNSUPPORTED;
if (tm) {
......@@ -67,7 +67,7 @@ static efi_status_t xen_efi_set_time(efi_time_t *tm)
BUILD_BUG_ON(sizeof(*tm) != sizeof(efi_data(op).u.set_time));
memcpy(&efi_data(op).u.set_time, tm, sizeof(*tm));
if (HYPERVISOR_dom0_op(&op) < 0)
if (HYPERVISOR_platform_op(&op) < 0)
return EFI_UNSUPPORTED;
return efi_data(op).status;
......@@ -79,7 +79,7 @@ static efi_status_t xen_efi_get_wakeup_time(efi_bool_t *enabled,
{
struct xen_platform_op op = INIT_EFI_OP(get_wakeup_time);
if (HYPERVISOR_dom0_op(&op) < 0)
if (HYPERVISOR_platform_op(&op) < 0)
return EFI_UNSUPPORTED;
if (tm) {
......@@ -108,7 +108,7 @@ static efi_status_t xen_efi_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm)
else
efi_data(op).misc |= XEN_EFI_SET_WAKEUP_TIME_ENABLE_ONLY;
if (HYPERVISOR_dom0_op(&op) < 0)
if (HYPERVISOR_platform_op(&op) < 0)
return EFI_UNSUPPORTED;
return efi_data(op).status;
......@@ -129,7 +129,7 @@ static efi_status_t xen_efi_get_variable(efi_char16_t *name,
efi_data(op).u.get_variable.size = *data_size;
set_xen_guest_handle(efi_data(op).u.get_variable.data, data);
if (HYPERVISOR_dom0_op(&op) < 0)
if (HYPERVISOR_platform_op(&op) < 0)
return EFI_UNSUPPORTED;
*data_size = efi_data(op).u.get_variable.size;
......@@ -152,7 +152,7 @@ static efi_status_t xen_efi_get_next_variable(unsigned long *name_size,
memcpy(&efi_data(op).u.get_next_variable_name.vendor_guid, vendor,
sizeof(*vendor));
if (HYPERVISOR_dom0_op(&op) < 0)
if (HYPERVISOR_platform_op(&op) < 0)
return EFI_UNSUPPORTED;
*name_size = efi_data(op).u.get_next_variable_name.size;
......@@ -178,7 +178,7 @@ static efi_status_t xen_efi_set_variable(efi_char16_t *name,
efi_data(op).u.set_variable.size = data_size;
set_xen_guest_handle(efi_data(op).u.set_variable.data, data);
if (HYPERVISOR_dom0_op(&op) < 0)
if (HYPERVISOR_platform_op(&op) < 0)
return EFI_UNSUPPORTED;
return efi_data(op).status;
......@@ -196,7 +196,7 @@ static efi_status_t xen_efi_query_variable_info(u32 attr,
efi_data(op).u.query_variable_info.attr = attr;
if (HYPERVISOR_dom0_op(&op) < 0)
if (HYPERVISOR_platform_op(&op) < 0)
return EFI_UNSUPPORTED;
*storage_space = efi_data(op).u.query_variable_info.max_store_size;
......@@ -210,7 +210,7 @@ static efi_status_t xen_efi_get_next_high_mono_count(u32 *count)
{
struct xen_platform_op op = INIT_EFI_OP(get_next_high_monotonic_count);
if (HYPERVISOR_dom0_op(&op) < 0)
if (HYPERVISOR_platform_op(&op) < 0)
return EFI_UNSUPPORTED;
*count = efi_data(op).misc;
......@@ -232,7 +232,7 @@ static efi_status_t xen_efi_update_capsule(efi_capsule_header_t **capsules,
efi_data(op).u.update_capsule.capsule_count = count;
efi_data(op).u.update_capsule.sg_list = sg_list;
if (HYPERVISOR_dom0_op(&op) < 0)
if (HYPERVISOR_platform_op(&op) < 0)
return EFI_UNSUPPORTED;
return efi_data(op).status;
......@@ -252,7 +252,7 @@ static efi_status_t xen_efi_query_capsule_caps(efi_capsule_header_t **capsules,
capsules);
efi_data(op).u.query_capsule_capabilities.capsule_count = count;
if (HYPERVISOR_dom0_op(&op) < 0)
if (HYPERVISOR_platform_op(&op) < 0)
return EFI_UNSUPPORTED;
*max_size = efi_data(op).u.query_capsule_capabilities.max_capsule_size;
......@@ -331,7 +331,7 @@ efi_system_table_t __init *xen_efi_probe(void)
};
union xenpf_efi_info *info = &op.u.firmware_info.u.efi_info;
if (!xen_initial_domain() || HYPERVISOR_dom0_op(&op) < 0)
if (!xen_initial_domain() || HYPERVISOR_platform_op(&op) < 0)
return NULL;
/* Here we know that Xen runs on EFI platform. */
......@@ -347,7 +347,7 @@ efi_system_table_t __init *xen_efi_probe(void)
info->vendor.bufsz = sizeof(vendor);
set_xen_guest_handle(info->vendor.name, vendor);
if (HYPERVISOR_dom0_op(&op) == 0) {
if (HYPERVISOR_platform_op(&op) == 0) {
efi_systab_xen.fw_vendor = __pa_symbol(vendor);
efi_systab_xen.fw_revision = info->vendor.revision;
} else
......@@ -357,14 +357,14 @@ efi_system_table_t __init *xen_efi_probe(void)
op.u.firmware_info.type = XEN_FW_EFI_INFO;
op.u.firmware_info.index = XEN_FW_EFI_VERSION;
if (HYPERVISOR_dom0_op(&op) == 0)
if (HYPERVISOR_platform_op(&op) == 0)
efi_systab_xen.hdr.revision = info->version;
op.cmd = XENPF_firmware_info;
op.u.firmware_info.type = XEN_FW_EFI_INFO;
op.u.firmware_info.index = XEN_FW_EFI_RT_VERSION;
if (HYPERVISOR_dom0_op(&op) == 0)
if (HYPERVISOR_platform_op(&op) == 0)
efi.runtime_version = info->version;
return &efi_systab_xen;
......
......@@ -518,7 +518,7 @@ static void mn_release(struct mmu_notifier *mn,
mutex_unlock(&priv->lock);
}
static struct mmu_notifier_ops gntdev_mmu_ops = {
static const struct mmu_notifier_ops gntdev_mmu_ops = {
.release = mn_release,
.invalidate_page = mn_invl_page,
.invalidate_range_start = mn_invl_range_start,
......@@ -748,6 +748,206 @@ static long gntdev_ioctl_notify(struct gntdev_priv *priv, void __user *u)
return rc;
}
#define GNTDEV_COPY_BATCH 24
struct gntdev_copy_batch {
struct gnttab_copy ops[GNTDEV_COPY_BATCH];
struct page *pages[GNTDEV_COPY_BATCH];
s16 __user *status[GNTDEV_COPY_BATCH];
unsigned int nr_ops;
unsigned int nr_pages;
};
static int gntdev_get_page(struct gntdev_copy_batch *batch, void __user *virt,
bool writeable, unsigned long *gfn)
{
unsigned long addr = (unsigned long)virt;
struct page *page;
unsigned long xen_pfn;
int ret;
ret = get_user_pages_fast(addr, 1, writeable, &page);
if (ret < 0)
return ret;
batch->pages[batch->nr_pages++] = page;
xen_pfn = page_to_xen_pfn(page) + XEN_PFN_DOWN(addr & ~PAGE_MASK);
*gfn = pfn_to_gfn(xen_pfn);
return 0;
}
static void gntdev_put_pages(struct gntdev_copy_batch *batch)
{
unsigned int i;
for (i = 0; i < batch->nr_pages; i++)
put_page(batch->pages[i]);
batch->nr_pages = 0;
}
static int gntdev_copy(struct gntdev_copy_batch *batch)
{
unsigned int i;
gnttab_batch_copy(batch->ops, batch->nr_ops);
gntdev_put_pages(batch);
/*
* For each completed op, update the status if the op failed
* and all previous ops for the segment were successful.
*/
for (i = 0; i < batch->nr_ops; i++) {
s16 status = batch->ops[i].status;
s16 old_status;
if (status == GNTST_okay)
continue;
if (__get_user(old_status, batch->status[i]))
return -EFAULT;
if (old_status != GNTST_okay)
continue;
if (__put_user(status, batch->status[i]))
return -EFAULT;
}
batch->nr_ops = 0;
return 0;
}
static int gntdev_grant_copy_seg(struct gntdev_copy_batch *batch,
struct gntdev_grant_copy_segment *seg,
s16 __user *status)
{
uint16_t copied = 0;
/*
* Disallow local -> local copies since there is only space in
* batch->pages for one page per-op and this would be a very
* expensive memcpy().
*/
if (!(seg->flags & (GNTCOPY_source_gref | GNTCOPY_dest_gref)))
return -EINVAL;
/* Can't cross page if source/dest is a grant ref. */
if (seg->flags & GNTCOPY_source_gref) {
if (seg->source.foreign.offset + seg->len > XEN_PAGE_SIZE)
return -EINVAL;
}
if (seg->flags & GNTCOPY_dest_gref) {
if (seg->dest.foreign.offset + seg->len > XEN_PAGE_SIZE)
return -EINVAL;
}
if (put_user(GNTST_okay, status))
return -EFAULT;
while (copied < seg->len) {
struct gnttab_copy *op;
void __user *virt;
size_t len, off;
unsigned long gfn;
int ret;
if (batch->nr_ops >= GNTDEV_COPY_BATCH) {
ret = gntdev_copy(batch);
if (ret < 0)
return ret;
}
len = seg->len - copied;
op = &batch->ops[batch->nr_ops];
op->flags = 0;
if (seg->flags & GNTCOPY_source_gref) {
op->source.u.ref = seg->source.foreign.ref;
op->source.domid = seg->source.foreign.domid;
op->source.offset = seg->source.foreign.offset + copied;
op->flags |= GNTCOPY_source_gref;
} else {
virt = seg->source.virt + copied;
off = (unsigned long)virt & ~XEN_PAGE_MASK;
len = min(len, (size_t)XEN_PAGE_SIZE - off);
ret = gntdev_get_page(batch, virt, false, &gfn);
if (ret < 0)
return ret;
op->source.u.gmfn = gfn;
op->source.domid = DOMID_SELF;
op->source.offset = off;
}
if (seg->flags & GNTCOPY_dest_gref) {
op->dest.u.ref = seg->dest.foreign.ref;
op->dest.domid = seg->dest.foreign.domid;
op->dest.offset = seg->dest.foreign.offset + copied;
op->flags |= GNTCOPY_dest_gref;
} else {
virt = seg->dest.virt + copied;
off = (unsigned long)virt & ~XEN_PAGE_MASK;
len = min(len, (size_t)XEN_PAGE_SIZE - off);
ret = gntdev_get_page(batch, virt, true, &gfn);
if (ret < 0)
return ret;
op->dest.u.gmfn = gfn;
op->dest.domid = DOMID_SELF;
op->dest.offset = off;
}
op->len = len;
copied += len;
batch->status[batch->nr_ops] = status;
batch->nr_ops++;
}
return 0;
}
static long gntdev_ioctl_grant_copy(struct gntdev_priv *priv, void __user *u)
{
struct ioctl_gntdev_grant_copy copy;
struct gntdev_copy_batch batch;
unsigned int i;
int ret = 0;
if (copy_from_user(&copy, u, sizeof(copy)))
return -EFAULT;
batch.nr_ops = 0;
batch.nr_pages = 0;
for (i = 0; i < copy.count; i++) {
struct gntdev_grant_copy_segment seg;
if (copy_from_user(&seg, &copy.segments[i], sizeof(seg))) {
ret = -EFAULT;
goto out;
}
ret = gntdev_grant_copy_seg(&batch, &seg, &copy.segments[i].status);
if (ret < 0)
goto out;
cond_resched();
}
if (batch.nr_ops)
ret = gntdev_copy(&batch);
return ret;
out:
gntdev_put_pages(&batch);
return ret;
}
static long gntdev_ioctl(struct file *flip,
unsigned int cmd, unsigned long arg)
{
......@@ -767,6 +967,9 @@ static long gntdev_ioctl(struct file *flip,
case IOCTL_GNTDEV_SET_UNMAP_NOTIFY:
return gntdev_ioctl_notify(priv, ptr);
case IOCTL_GNTDEV_GRANT_COPY:
return gntdev_ioctl_grant_copy(priv, ptr);
default:
pr_debug("priv %p, unknown cmd %x\n", priv, cmd);
return -ENOIOCTLCMD;
......
......@@ -128,7 +128,7 @@ struct unmap_refs_callback_data {
int result;
};
static struct gnttab_ops *gnttab_interface;
static const struct gnttab_ops *gnttab_interface;
static int grant_table_version;
static int grefs_per_grant_frame;
......@@ -1013,7 +1013,7 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
return rc;
}
static struct gnttab_ops gnttab_v1_ops = {
static const struct gnttab_ops gnttab_v1_ops = {
.map_frames = gnttab_map_frames_v1,
.unmap_frames = gnttab_unmap_frames_v1,
.update_entry = gnttab_update_entry_v1,
......
......@@ -78,7 +78,7 @@ static int xen_pcpu_down(uint32_t cpu_id)
.u.cpu_ol.cpuid = cpu_id,
};
return HYPERVISOR_dom0_op(&op);
return HYPERVISOR_platform_op(&op);
}
static int xen_pcpu_up(uint32_t cpu_id)
......@@ -89,7 +89,7 @@ static int xen_pcpu_up(uint32_t cpu_id)
.u.cpu_ol.cpuid = cpu_id,
};
return HYPERVISOR_dom0_op(&op);
return HYPERVISOR_platform_op(&op);
}
static ssize_t show_online(struct device *dev,
......@@ -277,7 +277,7 @@ static int sync_pcpu(uint32_t cpu, uint32_t *max_cpu)
.u.pcpu_info.xen_cpuid = cpu,
};
ret = HYPERVISOR_dom0_op(&op);
ret = HYPERVISOR_platform_op(&op);
if (ret)
return ret;
......@@ -364,7 +364,7 @@ int xen_pcpu_id(uint32_t acpi_id)
op.cmd = XENPF_get_cpuinfo;
while (cpu_id <= max_id) {
op.u.pcpu_info.xen_cpuid = cpu_id;
if (HYPERVISOR_dom0_op(&op)) {
if (HYPERVISOR_platform_op(&op)) {
cpu_id++;
continue;
}
......
/*
* Xen stolen ticks accounting.
*/
#include <linux/kernel.h>
#include <linux/kernel_stat.h>
#include <linux/math64.h>
#include <linux/gfp.h>
#include <asm/xen/hypervisor.h>
#include <asm/xen/hypercall.h>
#include <xen/events.h>
#include <xen/features.h>
#include <xen/interface/xen.h>
#include <xen/interface/vcpu.h>
#include <xen/xen-ops.h>
/* runstate info updated by Xen */
static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate);
/* return an consistent snapshot of 64-bit time/counter value */
static u64 get64(const u64 *p)
{
u64 ret;
if (BITS_PER_LONG < 64) {
u32 *p32 = (u32 *)p;
u32 h, l, h2;
/*
* Read high then low, and then make sure high is
* still the same; this will only loop if low wraps
* and carries into high.
* XXX some clean way to make this endian-proof?
*/
do {
h = READ_ONCE(p32[1]);
l = READ_ONCE(p32[0]);
h2 = READ_ONCE(p32[1]);
} while(h2 != h);
ret = (((u64)h) << 32) | l;
} else
ret = READ_ONCE(*p);
return ret;
}
/*
* Runstate accounting
*/
void xen_get_runstate_snapshot(struct vcpu_runstate_info *res)
{
u64 state_time;
struct vcpu_runstate_info *state;
BUG_ON(preemptible());
state = this_cpu_ptr(&xen_runstate);
/*
* The runstate info is always updated by the hypervisor on
* the current CPU, so there's no need to use anything
* stronger than a compiler barrier when fetching it.
*/
do {
state_time = get64(&state->state_entry_time);
*res = READ_ONCE(*state);
} while (get64(&state->state_entry_time) != state_time);
}
/* return true when a vcpu could run but has no real cpu to run on */
bool xen_vcpu_stolen(int vcpu)
{
return per_cpu(xen_runstate, vcpu).state == RUNSTATE_runnable;
}
void xen_setup_runstate_info(int cpu)
{
struct vcpu_register_runstate_memory_area area;
area.addr.v = &per_cpu(xen_runstate, cpu);
if (HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area,
cpu, &area))
BUG();
}
......@@ -206,7 +206,7 @@ static int xen_hotadd_cpu(struct acpi_processor *pr)
op.u.cpu_add.acpi_id = pr->acpi_id;
op.u.cpu_add.pxm = pxm;
cpu_id = HYPERVISOR_dom0_op(&op);
cpu_id = HYPERVISOR_platform_op(&op);
if (cpu_id < 0)
pr_err(PREFIX "Failed to hotadd CPU for acpi_id %d\n",
pr->acpi_id);
......
......@@ -36,7 +36,7 @@ static int xen_acpi_pad_idle_cpus(unsigned int idle_nums)
op.u.core_parking.type = XEN_CORE_PARKING_SET;
op.u.core_parking.idle_nums = idle_nums;
return HYPERVISOR_dom0_op(&op);
return HYPERVISOR_platform_op(&op);
}
static int xen_acpi_pad_idle_cpus_num(void)
......@@ -46,7 +46,7 @@ static int xen_acpi_pad_idle_cpus_num(void)
op.cmd = XENPF_core_parking;
op.u.core_parking.type = XEN_CORE_PARKING_GET;
return HYPERVISOR_dom0_op(&op)
return HYPERVISOR_platform_op(&op)
?: op.u.core_parking.idle_nums;
}
......
......@@ -116,7 +116,7 @@ static int push_cxx_to_hypervisor(struct acpi_processor *_pr)
set_xen_guest_handle(op.u.set_pminfo.power.states, dst_cx_states);
if (!no_hypercall)
ret = HYPERVISOR_dom0_op(&op);
ret = HYPERVISOR_platform_op(&op);
if (!ret) {
pr_debug("ACPI CPU%u - C-states uploaded.\n", _pr->acpi_id);
......@@ -244,7 +244,7 @@ static int push_pxx_to_hypervisor(struct acpi_processor *_pr)
}
if (!no_hypercall)
ret = HYPERVISOR_dom0_op(&op);
ret = HYPERVISOR_platform_op(&op);
if (!ret) {
struct acpi_processor_performance *perf;
......@@ -302,7 +302,7 @@ static unsigned int __init get_max_acpi_id(void)
info = &op.u.pcpu_info;
info->xen_cpuid = 0;
ret = HYPERVISOR_dom0_op(&op);
ret = HYPERVISOR_platform_op(&op);
if (ret)
return NR_CPUS;
......@@ -310,7 +310,7 @@ static unsigned int __init get_max_acpi_id(void)
last_cpu = op.u.pcpu_info.max_present;
for (i = 0; i <= last_cpu; i++) {
info->xen_cpuid = i;
ret = HYPERVISOR_dom0_op(&op);
ret = HYPERVISOR_platform_op(&op);
if (ret)
continue;
max_acpi_id = max(info->acpi_id, max_acpi_id);
......
......@@ -31,7 +31,7 @@ static int xensyms_next_sym(struct xensyms *xs)
symnum = symdata->symnum;
ret = HYPERVISOR_dom0_op(&xs->op);
ret = HYPERVISOR_platform_op(&xs->op);
if (ret < 0)
return ret;
......@@ -50,7 +50,7 @@ static int xensyms_next_sym(struct xensyms *xs)
set_xen_guest_handle(symdata->name, xs->name);
symdata->symnum--; /* Rewind */
ret = HYPERVISOR_dom0_op(&xs->op);
ret = HYPERVISOR_platform_op(&xs->op);
if (ret < 0)
return ret;
}
......
......@@ -144,6 +144,56 @@ struct ioctl_gntdev_unmap_notify {
__u32 event_channel_port;
};
struct gntdev_grant_copy_segment {
union {
void __user *virt;
struct {
grant_ref_t ref;
__u16 offset;
domid_t domid;
} foreign;
} source, dest;
__u16 len;
__u16 flags; /* GNTCOPY_* */
__s16 status; /* GNTST_* */
};
/*
* Copy between grant references and local buffers.
*
* The copy is split into @count @segments, each of which can copy
* to/from one grant reference.
*
* Each segment is similar to struct gnttab_copy in the hypervisor ABI
* except the local buffer is specified using a virtual address
* (instead of a GFN and offset).
*
* The local buffer may cross a Xen page boundary -- the driver will
* split segments into multiple ops if required.
*
* Returns 0 if all segments have been processed and @status in each
* segment is valid. Note that one or more segments may have failed
* (status != GNTST_okay).
*
* If the driver had to split a segment into two or more ops, @status
* includes the status of the first failed op for that segment (or
* GNTST_okay if all ops were successful).
*
* If -1 is returned, the status of all segments is undefined.
*
* EINVAL: A segment has local buffers for both source and
* destination.
* EINVAL: A segment crosses the boundary of a foreign page.
* EFAULT: A segment's local buffer is not accessible.
*/
#define IOCTL_GNTDEV_GRANT_COPY \
_IOC(_IOC_NONE, 'G', 8, sizeof(struct ioctl_gntdev_grant_copy))
struct ioctl_gntdev_grant_copy {
unsigned int count;
struct gntdev_grant_copy_segment __user *segments;
};
/* Clear (set to zero) the byte specified by index */
#define UNMAP_NOTIFY_CLEAR_BYTE 0x1
/* Send an interrupt on the indicated event channel */
......
......@@ -35,14 +35,23 @@
* Set clock such that it would read <secs,nsecs> after 00:00:00 UTC,
* 1 January, 1970 if the current system time was <system_time>.
*/
#define XENPF_settime 17
struct xenpf_settime {
#define XENPF_settime32 17
struct xenpf_settime32 {
/* IN variables. */
uint32_t secs;
uint32_t nsecs;
uint64_t system_time;
};
DEFINE_GUEST_HANDLE_STRUCT(xenpf_settime_t);
DEFINE_GUEST_HANDLE_STRUCT(xenpf_settime32_t);
#define XENPF_settime64 62
struct xenpf_settime64 {
/* IN variables. */
uint64_t secs;
uint32_t nsecs;
uint32_t mbz;
uint64_t system_time;
};
DEFINE_GUEST_HANDLE_STRUCT(xenpf_settime64_t);
/*
* Request memory range (@mfn, @mfn+@nr_mfns-1) to have type @type.
......@@ -495,7 +504,8 @@ struct xen_platform_op {
uint32_t cmd;
uint32_t interface_version; /* XENPF_INTERFACE_VERSION */
union {
struct xenpf_settime settime;
struct xenpf_settime32 settime32;
struct xenpf_settime64 settime64;
struct xenpf_add_memtype add_memtype;
struct xenpf_del_memtype del_memtype;
struct xenpf_read_memtype read_memtype;
......
......@@ -48,7 +48,7 @@
#define __HYPERVISOR_set_callbacks 4
#define __HYPERVISOR_fpu_taskswitch 5
#define __HYPERVISOR_sched_op_compat 6
#define __HYPERVISOR_dom0_op 7
#define __HYPERVISOR_platform_op 7
#define __HYPERVISOR_set_debugreg 8
#define __HYPERVISOR_get_debugreg 9
#define __HYPERVISOR_update_descriptor 10
......
......@@ -5,6 +5,7 @@
#include <linux/notifier.h>
#include <linux/efi.h>
#include <asm/xen/interface.h>
#include <xen/interface/vcpu.h>
DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu);
......@@ -18,6 +19,10 @@ void xen_arch_suspend(void);
void xen_resume_notifier_register(struct notifier_block *nb);
void xen_resume_notifier_unregister(struct notifier_block *nb);
bool xen_vcpu_stolen(int vcpu);
void xen_setup_runstate_info(int cpu);
void xen_get_runstate_snapshot(struct vcpu_runstate_info *res);
int xen_setup_shutdown_event(void);
extern unsigned long *xen_contiguous_bitmap;
......
......@@ -5,6 +5,9 @@
#include <linux/static_key.h>
#include <linux/context_tracking.h>
#include "sched.h"
#ifdef CONFIG_PARAVIRT
#include <asm/paravirt.h>
#endif
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册