提交 c61264f9 编写于 作者: L Linus Torvalds

Merge branch 'upstream/xen-tracing2' of git://git.kernel.org/pub/scm/linux/kernel/git/jeremy/xen

* 'upstream/xen-tracing2' of git://git.kernel.org/pub/scm/linux/kernel/git/jeremy/xen:
  xen/trace: use class for multicall trace
  xen/trace: convert mmu events to use DECLARE_EVENT_CLASS()/DEFINE_EVENT()
  xen/multicall: move *idx fields to start of mc_buffer
  xen/multicall: special-case singleton hypercalls
  xen/multicalls: add unlikely around slowpath in __xen_mc_entry()
  xen/multicalls: disable MC_DEBUG
  xen/mmu: tune pgtable alloc/release
  xen/mmu: use extend_args for more mmuext updates
  xen/trace: add tlb flush tracepoints
  xen/trace: add segment desc tracing
  xen/trace: add xen_pgd_(un)pin tracepoints
  xen/trace: add ptpage alloc/release tracepoints
  xen/trace: add mmu tracepoints
  xen/trace: add multicall tracing
  xen/trace: set up tracepoint skeleton
  xen/multicalls: remove debugfs stats
  trace/xen: add skeleton for Xen trace events
......@@ -39,6 +39,8 @@
#include <linux/string.h>
#include <linux/types.h>
#include <trace/events/xen.h>
#include <asm/page.h>
#include <asm/pgtable.h>
......@@ -459,6 +461,8 @@ MULTI_fpu_taskswitch(struct multicall_entry *mcl, int set)
{
mcl->op = __HYPERVISOR_fpu_taskswitch;
mcl->args[0] = set;
trace_xen_mc_entry(mcl, 1);
}
static inline void
......@@ -475,6 +479,8 @@ MULTI_update_va_mapping(struct multicall_entry *mcl, unsigned long va,
mcl->args[2] = new_val.pte >> 32;
mcl->args[3] = flags;
}
trace_xen_mc_entry(mcl, sizeof(new_val) == sizeof(long) ? 3 : 4);
}
static inline void
......@@ -485,6 +491,8 @@ MULTI_grant_table_op(struct multicall_entry *mcl, unsigned int cmd,
mcl->args[0] = cmd;
mcl->args[1] = (unsigned long)uop;
mcl->args[2] = count;
trace_xen_mc_entry(mcl, 3);
}
static inline void
......@@ -504,6 +512,8 @@ MULTI_update_va_mapping_otherdomain(struct multicall_entry *mcl, unsigned long v
mcl->args[3] = flags;
mcl->args[4] = domid;
}
trace_xen_mc_entry(mcl, sizeof(new_val) == sizeof(long) ? 4 : 5);
}
static inline void
......@@ -520,6 +530,8 @@ MULTI_update_descriptor(struct multicall_entry *mcl, u64 maddr,
mcl->args[2] = desc.a;
mcl->args[3] = desc.b;
}
trace_xen_mc_entry(mcl, sizeof(maddr) == sizeof(long) ? 2 : 4);
}
static inline void
......@@ -528,6 +540,8 @@ MULTI_memory_op(struct multicall_entry *mcl, unsigned int cmd, void *arg)
mcl->op = __HYPERVISOR_memory_op;
mcl->args[0] = cmd;
mcl->args[1] = (unsigned long)arg;
trace_xen_mc_entry(mcl, 2);
}
static inline void
......@@ -539,6 +553,8 @@ MULTI_mmu_update(struct multicall_entry *mcl, struct mmu_update *req,
mcl->args[1] = count;
mcl->args[2] = (unsigned long)success_count;
mcl->args[3] = domid;
trace_xen_mc_entry(mcl, 4);
}
static inline void
......@@ -550,6 +566,8 @@ MULTI_mmuext_op(struct multicall_entry *mcl, struct mmuext_op *op, int count,
mcl->args[1] = count;
mcl->args[2] = (unsigned long)success_count;
mcl->args[3] = domid;
trace_xen_mc_entry(mcl, 4);
}
static inline void
......@@ -558,6 +576,8 @@ MULTI_set_gdt(struct multicall_entry *mcl, unsigned long *frames, int entries)
mcl->op = __HYPERVISOR_set_gdt;
mcl->args[0] = (unsigned long)frames;
mcl->args[1] = entries;
trace_xen_mc_entry(mcl, 2);
}
static inline void
......@@ -567,6 +587,8 @@ MULTI_stack_switch(struct multicall_entry *mcl,
mcl->op = __HYPERVISOR_stack_switch;
mcl->args[0] = ss;
mcl->args[1] = esp;
trace_xen_mc_entry(mcl, 2);
}
#endif /* _ASM_X86_XEN_HYPERCALL_H */
#ifndef _ASM_XEN_TRACE_TYPES_H
#define _ASM_XEN_TRACE_TYPES_H
enum xen_mc_flush_reason {
XEN_MC_FL_NONE, /* explicit flush */
XEN_MC_FL_BATCH, /* out of hypercall space */
XEN_MC_FL_ARGS, /* out of argument space */
XEN_MC_FL_CALLBACK, /* out of callback space */
};
enum xen_mc_extend_args {
XEN_MC_XE_OK,
XEN_MC_XE_BAD_OP,
XEN_MC_XE_NO_SPACE
};
typedef void (*xen_mc_callback_fn_t)(void *);
#endif /* _ASM_XEN_TRACE_TYPES_H */
......@@ -13,7 +13,7 @@ CFLAGS_mmu.o := $(nostackp)
obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \
time.o xen-asm.o xen-asm_$(BITS).o \
grant-table.o suspend.o platform-pci-unplug.o \
p2m.o
p2m.o trace.o
obj-$(CONFIG_SMP) += smp.o
obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o
......
......@@ -341,6 +341,8 @@ static void xen_set_ldt(const void *addr, unsigned entries)
struct mmuext_op *op;
struct multicall_space mcs = xen_mc_entry(sizeof(*op));
trace_xen_cpu_set_ldt(addr, entries);
op = mcs.args;
op->cmd = MMUEXT_SET_LDT;
op->arg1.linear_addr = (unsigned long)addr;
......@@ -496,6 +498,8 @@ static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
xmaddr_t mach_lp = arbitrary_virt_to_machine(&dt[entrynum]);
u64 entry = *(u64 *)ptr;
trace_xen_cpu_write_ldt_entry(dt, entrynum, entry);
preempt_disable();
xen_mc_flush();
......@@ -565,6 +569,8 @@ static void xen_write_idt_entry(gate_desc *dt, int entrynum, const gate_desc *g)
unsigned long p = (unsigned long)&dt[entrynum];
unsigned long start, end;
trace_xen_cpu_write_idt_entry(dt, entrynum, g);
preempt_disable();
start = __this_cpu_read(idt_desc.address);
......@@ -619,6 +625,8 @@ static void xen_load_idt(const struct desc_ptr *desc)
static DEFINE_SPINLOCK(lock);
static struct trap_info traps[257];
trace_xen_cpu_load_idt(desc);
spin_lock(&lock);
__get_cpu_var(idt_desc) = *desc;
......@@ -637,6 +645,8 @@ static void xen_load_idt(const struct desc_ptr *desc)
static void xen_write_gdt_entry(struct desc_struct *dt, int entry,
const void *desc, int type)
{
trace_xen_cpu_write_gdt_entry(dt, entry, desc, type);
preempt_disable();
switch (type) {
......@@ -665,6 +675,8 @@ static void xen_write_gdt_entry(struct desc_struct *dt, int entry,
static void __init xen_write_gdt_entry_boot(struct desc_struct *dt, int entry,
const void *desc, int type)
{
trace_xen_cpu_write_gdt_entry(dt, entry, desc, type);
switch (type) {
case DESC_LDT:
case DESC_TSS:
......@@ -684,7 +696,9 @@ static void __init xen_write_gdt_entry_boot(struct desc_struct *dt, int entry,
static void xen_load_sp0(struct tss_struct *tss,
struct thread_struct *thread)
{
struct multicall_space mcs = xen_mc_entry(0);
struct multicall_space mcs;
mcs = xen_mc_entry(0);
MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->sp0);
xen_mc_issue(PARAVIRT_LAZY_CPU);
}
......
......@@ -48,6 +48,8 @@
#include <linux/memblock.h>
#include <linux/seq_file.h>
#include <trace/events/xen.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
#include <asm/fixmap.h>
......@@ -194,6 +196,8 @@ void xen_set_domain_pte(pte_t *ptep, pte_t pteval, unsigned domid)
struct multicall_space mcs;
struct mmu_update *u;
trace_xen_mmu_set_domain_pte(ptep, pteval, domid);
mcs = xen_mc_entry(sizeof(*u));
u = mcs.args;
......@@ -225,6 +229,24 @@ static void xen_extend_mmu_update(const struct mmu_update *update)
*u = *update;
}
static void xen_extend_mmuext_op(const struct mmuext_op *op)
{
struct multicall_space mcs;
struct mmuext_op *u;
mcs = xen_mc_extend_args(__HYPERVISOR_mmuext_op, sizeof(*u));
if (mcs.mc != NULL) {
mcs.mc->args[1]++;
} else {
mcs = __xen_mc_entry(sizeof(*u));
MULTI_mmuext_op(mcs.mc, mcs.args, 1, NULL, DOMID_SELF);
}
u = mcs.args;
*u = *op;
}
static void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val)
{
struct mmu_update u;
......@@ -245,6 +267,8 @@ static void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val)
static void xen_set_pmd(pmd_t *ptr, pmd_t val)
{
trace_xen_mmu_set_pmd(ptr, val);
/* If page is not pinned, we can just update the entry
directly */
if (!xen_page_pinned(ptr)) {
......@@ -282,22 +306,30 @@ static bool xen_batched_set_pte(pte_t *ptep, pte_t pteval)
return true;
}
static void xen_set_pte(pte_t *ptep, pte_t pteval)
static inline void __xen_set_pte(pte_t *ptep, pte_t pteval)
{
if (!xen_batched_set_pte(ptep, pteval))
native_set_pte(ptep, pteval);
}
static void xen_set_pte(pte_t *ptep, pte_t pteval)
{
trace_xen_mmu_set_pte(ptep, pteval);
__xen_set_pte(ptep, pteval);
}
static void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pteval)
{
xen_set_pte(ptep, pteval);
trace_xen_mmu_set_pte_at(mm, addr, ptep, pteval);
__xen_set_pte(ptep, pteval);
}
pte_t xen_ptep_modify_prot_start(struct mm_struct *mm,
unsigned long addr, pte_t *ptep)
{
/* Just return the pte as-is. We preserve the bits on commit */
trace_xen_mmu_ptep_modify_prot_start(mm, addr, ptep, *ptep);
return *ptep;
}
......@@ -306,6 +338,7 @@ void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
{
struct mmu_update u;
trace_xen_mmu_ptep_modify_prot_commit(mm, addr, ptep, pte);
xen_mc_batch();
u.ptr = virt_to_machine(ptep).maddr | MMU_PT_UPDATE_PRESERVE_AD;
......@@ -530,6 +563,8 @@ static void xen_set_pud_hyper(pud_t *ptr, pud_t val)
static void xen_set_pud(pud_t *ptr, pud_t val)
{
trace_xen_mmu_set_pud(ptr, val);
/* If page is not pinned, we can just update the entry
directly */
if (!xen_page_pinned(ptr)) {
......@@ -543,17 +578,20 @@ static void xen_set_pud(pud_t *ptr, pud_t val)
#ifdef CONFIG_X86_PAE
static void xen_set_pte_atomic(pte_t *ptep, pte_t pte)
{
trace_xen_mmu_set_pte_atomic(ptep, pte);
set_64bit((u64 *)ptep, native_pte_val(pte));
}
static void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
{
trace_xen_mmu_pte_clear(mm, addr, ptep);
if (!xen_batched_set_pte(ptep, native_make_pte(0)))
native_pte_clear(mm, addr, ptep);
}
static void xen_pmd_clear(pmd_t *pmdp)
{
trace_xen_mmu_pmd_clear(pmdp);
set_pmd(pmdp, __pmd(0));
}
#endif /* CONFIG_X86_PAE */
......@@ -629,6 +667,8 @@ static void xen_set_pgd(pgd_t *ptr, pgd_t val)
{
pgd_t *user_ptr = xen_get_user_pgd(ptr);
trace_xen_mmu_set_pgd(ptr, user_ptr, val);
/* If page is not pinned, we can just update the entry
directly */
if (!xen_page_pinned(ptr)) {
......@@ -788,14 +828,12 @@ static void xen_pte_unlock(void *v)
static void xen_do_pin(unsigned level, unsigned long pfn)
{
struct mmuext_op *op;
struct multicall_space mcs;
struct mmuext_op op;
mcs = __xen_mc_entry(sizeof(*op));
op = mcs.args;
op->cmd = level;
op->arg1.mfn = pfn_to_mfn(pfn);
MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
op.cmd = level;
op.arg1.mfn = pfn_to_mfn(pfn);
xen_extend_mmuext_op(&op);
}
static int xen_pin_page(struct mm_struct *mm, struct page *page,
......@@ -863,6 +901,8 @@ static int xen_pin_page(struct mm_struct *mm, struct page *page,
read-only, and can be pinned. */
static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd)
{
trace_xen_mmu_pgd_pin(mm, pgd);
xen_mc_batch();
if (__xen_pgd_walk(mm, pgd, xen_pin_page, USER_LIMIT)) {
......@@ -988,6 +1028,8 @@ static int xen_unpin_page(struct mm_struct *mm, struct page *page,
/* Release a pagetables pages back as normal RW */
static void __xen_pgd_unpin(struct mm_struct *mm, pgd_t *pgd)
{
trace_xen_mmu_pgd_unpin(mm, pgd);
xen_mc_batch();
xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
......@@ -1196,6 +1238,8 @@ static void xen_flush_tlb(void)
struct mmuext_op *op;
struct multicall_space mcs;
trace_xen_mmu_flush_tlb(0);
preempt_disable();
mcs = xen_mc_entry(sizeof(*op));
......@@ -1214,6 +1258,8 @@ static void xen_flush_tlb_single(unsigned long addr)
struct mmuext_op *op;
struct multicall_space mcs;
trace_xen_mmu_flush_tlb_single(addr);
preempt_disable();
mcs = xen_mc_entry(sizeof(*op));
......@@ -1240,6 +1286,8 @@ static void xen_flush_tlb_others(const struct cpumask *cpus,
} *args;
struct multicall_space mcs;
trace_xen_mmu_flush_tlb_others(cpus, mm, va);
if (cpumask_empty(cpus))
return; /* nothing to do */
......@@ -1275,10 +1323,11 @@ static void set_current_cr3(void *v)
static void __xen_write_cr3(bool kernel, unsigned long cr3)
{
struct mmuext_op *op;
struct multicall_space mcs;
struct mmuext_op op;
unsigned long mfn;
trace_xen_mmu_write_cr3(kernel, cr3);
if (cr3)
mfn = pfn_to_mfn(PFN_DOWN(cr3));
else
......@@ -1286,13 +1335,10 @@ static void __xen_write_cr3(bool kernel, unsigned long cr3)
WARN_ON(mfn == 0 && kernel);
mcs = __xen_mc_entry(sizeof(*op));
op = mcs.args;
op->cmd = kernel ? MMUEXT_NEW_BASEPTR : MMUEXT_NEW_USER_BASEPTR;
op->arg1.mfn = mfn;
op.cmd = kernel ? MMUEXT_NEW_BASEPTR : MMUEXT_NEW_USER_BASEPTR;
op.arg1.mfn = mfn;
MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
xen_extend_mmuext_op(&op);
if (kernel) {
percpu_write(xen_cr3, cr3);
......@@ -1451,19 +1497,52 @@ static void __init xen_release_pmd_init(unsigned long pfn)
make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
}
static inline void __pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
{
struct multicall_space mcs;
struct mmuext_op *op;
mcs = __xen_mc_entry(sizeof(*op));
op = mcs.args;
op->cmd = cmd;
op->arg1.mfn = pfn_to_mfn(pfn);
MULTI_mmuext_op(mcs.mc, mcs.args, 1, NULL, DOMID_SELF);
}
static inline void __set_pfn_prot(unsigned long pfn, pgprot_t prot)
{
struct multicall_space mcs;
unsigned long addr = (unsigned long)__va(pfn << PAGE_SHIFT);
mcs = __xen_mc_entry(0);
MULTI_update_va_mapping(mcs.mc, (unsigned long)addr,
pfn_pte(pfn, prot), 0);
}
/* This needs to make sure the new pte page is pinned iff its being
attached to a pinned pagetable. */
static void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, unsigned level)
static inline void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn,
unsigned level)
{
bool pinned = PagePinned(virt_to_page(mm->pgd));
trace_xen_mmu_alloc_ptpage(mm, pfn, level, pinned);
if (pinned) {
struct page *page = pfn_to_page(pfn);
if (PagePinned(virt_to_page(mm->pgd))) {
SetPagePinned(page);
if (!PageHighMem(page)) {
make_lowmem_page_readonly(__va(PFN_PHYS((unsigned long)pfn)));
xen_mc_batch();
__set_pfn_prot(pfn, PAGE_KERNEL_RO);
if (level == PT_PTE && USE_SPLIT_PTLOCKS)
pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn);
__pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn);
xen_mc_issue(PARAVIRT_LAZY_MMU);
} else {
/* make sure there are no stray mappings of
this page */
......@@ -1483,15 +1562,23 @@ static void xen_alloc_pmd(struct mm_struct *mm, unsigned long pfn)
}
/* This should never happen until we're OK to use struct page */
static void xen_release_ptpage(unsigned long pfn, unsigned level)
static inline void xen_release_ptpage(unsigned long pfn, unsigned level)
{
struct page *page = pfn_to_page(pfn);
bool pinned = PagePinned(page);
trace_xen_mmu_release_ptpage(pfn, level, pinned);
if (PagePinned(page)) {
if (pinned) {
if (!PageHighMem(page)) {
xen_mc_batch();
if (level == PT_PTE && USE_SPLIT_PTLOCKS)
pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn);
make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
__pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn);
__set_pfn_prot(pfn, PAGE_KERNEL);
xen_mc_issue(PARAVIRT_LAZY_MMU);
}
ClearPagePinned(page);
}
......
......@@ -30,12 +30,13 @@
#define MC_BATCH 32
#define MC_DEBUG 1
#define MC_DEBUG 0
#define MC_ARGS (MC_BATCH * 16)
struct mc_buffer {
unsigned mcidx, argidx, cbidx;
struct multicall_entry entries[MC_BATCH];
#if MC_DEBUG
struct multicall_entry debug[MC_BATCH];
......@@ -46,85 +47,15 @@ struct mc_buffer {
void (*fn)(void *);
void *data;
} callbacks[MC_BATCH];
unsigned mcidx, argidx, cbidx;
};
static DEFINE_PER_CPU(struct mc_buffer, mc_buffer);
DEFINE_PER_CPU(unsigned long, xen_mc_irq_flags);
/* flush reasons 0- slots, 1- args, 2- callbacks */
enum flush_reasons
{
FL_SLOTS,
FL_ARGS,
FL_CALLBACKS,
FL_N_REASONS
};
#ifdef CONFIG_XEN_DEBUG_FS
#define NHYPERCALLS 40 /* not really */
static struct {
unsigned histo[MC_BATCH+1];
unsigned issued;
unsigned arg_total;
unsigned hypercalls;
unsigned histo_hypercalls[NHYPERCALLS];
unsigned flush[FL_N_REASONS];
} mc_stats;
static u8 zero_stats;
static inline void check_zero(void)
{
if (unlikely(zero_stats)) {
memset(&mc_stats, 0, sizeof(mc_stats));
zero_stats = 0;
}
}
static void mc_add_stats(const struct mc_buffer *mc)
{
int i;
check_zero();
mc_stats.issued++;
mc_stats.hypercalls += mc->mcidx;
mc_stats.arg_total += mc->argidx;
mc_stats.histo[mc->mcidx]++;
for(i = 0; i < mc->mcidx; i++) {
unsigned op = mc->entries[i].op;
if (op < NHYPERCALLS)
mc_stats.histo_hypercalls[op]++;
}
}
static void mc_stats_flush(enum flush_reasons idx)
{
check_zero();
mc_stats.flush[idx]++;
}
#else /* !CONFIG_XEN_DEBUG_FS */
static inline void mc_add_stats(const struct mc_buffer *mc)
{
}
static inline void mc_stats_flush(enum flush_reasons idx)
{
}
#endif /* CONFIG_XEN_DEBUG_FS */
void xen_mc_flush(void)
{
struct mc_buffer *b = &__get_cpu_var(mc_buffer);
struct multicall_entry *mc;
int ret = 0;
unsigned long flags;
int i;
......@@ -135,9 +66,26 @@ void xen_mc_flush(void)
something in the middle */
local_irq_save(flags);
mc_add_stats(b);
trace_xen_mc_flush(b->mcidx, b->argidx, b->cbidx);
switch (b->mcidx) {
case 0:
/* no-op */
BUG_ON(b->argidx != 0);
break;
case 1:
/* Singleton multicall - bypass multicall machinery
and just do the call directly. */
mc = &b->entries[0];
if (b->mcidx) {
mc->result = privcmd_call(mc->op,
mc->args[0], mc->args[1], mc->args[2],
mc->args[3], mc->args[4]);
ret = mc->result < 0;
break;
default:
#if MC_DEBUG
memcpy(b->debug, b->entries,
b->mcidx * sizeof(struct multicall_entry));
......@@ -164,11 +112,10 @@ void xen_mc_flush(void)
}
}
#endif
}
b->mcidx = 0;
b->argidx = 0;
} else
BUG_ON(b->argidx != 0);
for (i = 0; i < b->cbidx; i++) {
struct callback *cb = &b->callbacks[i];
......@@ -188,18 +135,21 @@ struct multicall_space __xen_mc_entry(size_t args)
struct multicall_space ret;
unsigned argidx = roundup(b->argidx, sizeof(u64));
trace_xen_mc_entry_alloc(args);
BUG_ON(preemptible());
BUG_ON(b->argidx >= MC_ARGS);
if (b->mcidx == MC_BATCH ||
(argidx + args) >= MC_ARGS) {
mc_stats_flush(b->mcidx == MC_BATCH ? FL_SLOTS : FL_ARGS);
if (unlikely(b->mcidx == MC_BATCH ||
(argidx + args) >= MC_ARGS)) {
trace_xen_mc_flush_reason((b->mcidx == MC_BATCH) ?
XEN_MC_FL_BATCH : XEN_MC_FL_ARGS);
xen_mc_flush();
argidx = roundup(b->argidx, sizeof(u64));
}
ret.mc = &b->entries[b->mcidx];
#ifdef MC_DEBUG
#if MC_DEBUG
b->caller[b->mcidx] = __builtin_return_address(0);
#endif
b->mcidx++;
......@@ -218,20 +168,25 @@ struct multicall_space xen_mc_extend_args(unsigned long op, size_t size)
BUG_ON(preemptible());
BUG_ON(b->argidx >= MC_ARGS);
if (b->mcidx == 0)
return ret;
if (b->entries[b->mcidx - 1].op != op)
return ret;
if (unlikely(b->mcidx == 0 ||
b->entries[b->mcidx - 1].op != op)) {
trace_xen_mc_extend_args(op, size, XEN_MC_XE_BAD_OP);
goto out;
}
if ((b->argidx + size) >= MC_ARGS)
return ret;
if (unlikely((b->argidx + size) >= MC_ARGS)) {
trace_xen_mc_extend_args(op, size, XEN_MC_XE_NO_SPACE);
goto out;
}
ret.mc = &b->entries[b->mcidx - 1];
ret.args = &b->args[b->argidx];
b->argidx += size;
BUG_ON(b->argidx >= MC_ARGS);
trace_xen_mc_extend_args(op, size, XEN_MC_XE_OK);
out:
return ret;
}
......@@ -241,43 +196,13 @@ void xen_mc_callback(void (*fn)(void *), void *data)
struct callback *cb;
if (b->cbidx == MC_BATCH) {
mc_stats_flush(FL_CALLBACKS);
trace_xen_mc_flush_reason(XEN_MC_FL_CALLBACK);
xen_mc_flush();
}
trace_xen_mc_callback(fn, data);
cb = &b->callbacks[b->cbidx++];
cb->fn = fn;
cb->data = data;
}
#ifdef CONFIG_XEN_DEBUG_FS
static struct dentry *d_mc_debug;
static int __init xen_mc_debugfs(void)
{
struct dentry *d_xen = xen_init_debugfs();
if (d_xen == NULL)
return -ENOMEM;
d_mc_debug = debugfs_create_dir("multicalls", d_xen);
debugfs_create_u8("zero_stats", 0644, d_mc_debug, &zero_stats);
debugfs_create_u32("batches", 0444, d_mc_debug, &mc_stats.issued);
debugfs_create_u32("hypercalls", 0444, d_mc_debug, &mc_stats.hypercalls);
debugfs_create_u32("arg_total", 0444, d_mc_debug, &mc_stats.arg_total);
xen_debugfs_create_u32_array("batch_histo", 0444, d_mc_debug,
mc_stats.histo, MC_BATCH);
xen_debugfs_create_u32_array("hypercall_histo", 0444, d_mc_debug,
mc_stats.histo_hypercalls, NHYPERCALLS);
xen_debugfs_create_u32_array("flush_reasons", 0444, d_mc_debug,
mc_stats.flush, FL_N_REASONS);
return 0;
}
fs_initcall(xen_mc_debugfs);
#endif /* CONFIG_XEN_DEBUG_FS */
#ifndef _XEN_MULTICALLS_H
#define _XEN_MULTICALLS_H
#include <trace/events/xen.h>
#include "xen-ops.h"
/* Multicalls */
......@@ -20,8 +22,10 @@ DECLARE_PER_CPU(unsigned long, xen_mc_irq_flags);
static inline void xen_mc_batch(void)
{
unsigned long flags;
/* need to disable interrupts until this entry is complete */
local_irq_save(flags);
trace_xen_mc_batch(paravirt_get_lazy_mode());
__this_cpu_write(xen_mc_irq_flags, flags);
}
......@@ -37,6 +41,8 @@ void xen_mc_flush(void);
/* Issue a multicall if we're not in a lazy mode */
static inline void xen_mc_issue(unsigned mode)
{
trace_xen_mc_issue(mode);
if ((paravirt_get_lazy_mode() & mode) == 0)
xen_mc_flush();
......
#include <linux/ftrace.h>
#define N(x) [__HYPERVISOR_##x] = "("#x")"
static const char *xen_hypercall_names[] = {
N(set_trap_table),
N(mmu_update),
N(set_gdt),
N(stack_switch),
N(set_callbacks),
N(fpu_taskswitch),
N(sched_op_compat),
N(dom0_op),
N(set_debugreg),
N(get_debugreg),
N(update_descriptor),
N(memory_op),
N(multicall),
N(update_va_mapping),
N(set_timer_op),
N(event_channel_op_compat),
N(xen_version),
N(console_io),
N(physdev_op_compat),
N(grant_table_op),
N(vm_assist),
N(update_va_mapping_otherdomain),
N(iret),
N(vcpu_op),
N(set_segment_base),
N(mmuext_op),
N(acm_op),
N(nmi_op),
N(sched_op),
N(callback_op),
N(xenoprof_op),
N(event_channel_op),
N(physdev_op),
N(hvm_op),
/* Architecture-specific hypercall definitions. */
N(arch_0),
N(arch_1),
N(arch_2),
N(arch_3),
N(arch_4),
N(arch_5),
N(arch_6),
N(arch_7),
};
#undef N
static const char *xen_hypercall_name(unsigned op)
{
if (op < ARRAY_SIZE(xen_hypercall_names) && xen_hypercall_names[op] != NULL)
return xen_hypercall_names[op];
return "";
}
#define CREATE_TRACE_POINTS
#include <trace/events/xen.h>
#undef TRACE_SYSTEM
#define TRACE_SYSTEM xen
#if !defined(_TRACE_XEN_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_XEN_H
#include <linux/tracepoint.h>
#include <asm/paravirt_types.h>
#include <asm/xen/trace_types.h>
/* Multicalls */
DECLARE_EVENT_CLASS(xen_mc__batch,
TP_PROTO(enum paravirt_lazy_mode mode),
TP_ARGS(mode),
TP_STRUCT__entry(
__field(enum paravirt_lazy_mode, mode)
),
TP_fast_assign(__entry->mode = mode),
TP_printk("start batch LAZY_%s",
(__entry->mode == PARAVIRT_LAZY_MMU) ? "MMU" :
(__entry->mode == PARAVIRT_LAZY_CPU) ? "CPU" : "NONE")
);
#define DEFINE_XEN_MC_BATCH(name) \
DEFINE_EVENT(xen_mc__batch, name, \
TP_PROTO(enum paravirt_lazy_mode mode), \
TP_ARGS(mode))
DEFINE_XEN_MC_BATCH(xen_mc_batch);
DEFINE_XEN_MC_BATCH(xen_mc_issue);
TRACE_EVENT(xen_mc_entry,
TP_PROTO(struct multicall_entry *mc, unsigned nargs),
TP_ARGS(mc, nargs),
TP_STRUCT__entry(
__field(unsigned int, op)
__field(unsigned int, nargs)
__array(unsigned long, args, 6)
),
TP_fast_assign(__entry->op = mc->op;
__entry->nargs = nargs;
memcpy(__entry->args, mc->args, sizeof(unsigned long) * nargs);
memset(__entry->args + nargs, 0, sizeof(unsigned long) * (6 - nargs));
),
TP_printk("op %u%s args [%lx, %lx, %lx, %lx, %lx, %lx]",
__entry->op, xen_hypercall_name(__entry->op),
__entry->args[0], __entry->args[1], __entry->args[2],
__entry->args[3], __entry->args[4], __entry->args[5])
);
TRACE_EVENT(xen_mc_entry_alloc,
TP_PROTO(size_t args),
TP_ARGS(args),
TP_STRUCT__entry(
__field(size_t, args)
),
TP_fast_assign(__entry->args = args),
TP_printk("alloc entry %zu arg bytes", __entry->args)
);
TRACE_EVENT(xen_mc_callback,
TP_PROTO(xen_mc_callback_fn_t fn, void *data),
TP_ARGS(fn, data),
TP_STRUCT__entry(
__field(xen_mc_callback_fn_t, fn)
__field(void *, data)
),
TP_fast_assign(
__entry->fn = fn;
__entry->data = data;
),
TP_printk("callback %pf, data %p",
__entry->fn, __entry->data)
);
TRACE_EVENT(xen_mc_flush_reason,
TP_PROTO(enum xen_mc_flush_reason reason),
TP_ARGS(reason),
TP_STRUCT__entry(
__field(enum xen_mc_flush_reason, reason)
),
TP_fast_assign(__entry->reason = reason),
TP_printk("flush reason %s",
(__entry->reason == XEN_MC_FL_NONE) ? "NONE" :
(__entry->reason == XEN_MC_FL_BATCH) ? "BATCH" :
(__entry->reason == XEN_MC_FL_ARGS) ? "ARGS" :
(__entry->reason == XEN_MC_FL_CALLBACK) ? "CALLBACK" : "??")
);
TRACE_EVENT(xen_mc_flush,
TP_PROTO(unsigned mcidx, unsigned argidx, unsigned cbidx),
TP_ARGS(mcidx, argidx, cbidx),
TP_STRUCT__entry(
__field(unsigned, mcidx)
__field(unsigned, argidx)
__field(unsigned, cbidx)
),
TP_fast_assign(__entry->mcidx = mcidx;
__entry->argidx = argidx;
__entry->cbidx = cbidx),
TP_printk("flushing %u hypercalls, %u arg bytes, %u callbacks",
__entry->mcidx, __entry->argidx, __entry->cbidx)
);
TRACE_EVENT(xen_mc_extend_args,
TP_PROTO(unsigned long op, size_t args, enum xen_mc_extend_args res),
TP_ARGS(op, args, res),
TP_STRUCT__entry(
__field(unsigned int, op)
__field(size_t, args)
__field(enum xen_mc_extend_args, res)
),
TP_fast_assign(__entry->op = op;
__entry->args = args;
__entry->res = res),
TP_printk("extending op %u%s by %zu bytes res %s",
__entry->op, xen_hypercall_name(__entry->op),
__entry->args,
__entry->res == XEN_MC_XE_OK ? "OK" :
__entry->res == XEN_MC_XE_BAD_OP ? "BAD_OP" :
__entry->res == XEN_MC_XE_NO_SPACE ? "NO_SPACE" : "???")
);
/* mmu */
DECLARE_EVENT_CLASS(xen_mmu__set_pte,
TP_PROTO(pte_t *ptep, pte_t pteval),
TP_ARGS(ptep, pteval),
TP_STRUCT__entry(
__field(pte_t *, ptep)
__field(pteval_t, pteval)
),
TP_fast_assign(__entry->ptep = ptep;
__entry->pteval = pteval.pte),
TP_printk("ptep %p pteval %0*llx (raw %0*llx)",
__entry->ptep,
(int)sizeof(pteval_t) * 2, (unsigned long long)pte_val(native_make_pte(__entry->pteval)),
(int)sizeof(pteval_t) * 2, (unsigned long long)__entry->pteval)
);
#define DEFINE_XEN_MMU_SET_PTE(name) \
DEFINE_EVENT(xen_mmu__set_pte, name, \
TP_PROTO(pte_t *ptep, pte_t pteval), \
TP_ARGS(ptep, pteval))
DEFINE_XEN_MMU_SET_PTE(xen_mmu_set_pte);
DEFINE_XEN_MMU_SET_PTE(xen_mmu_set_pte_atomic);
TRACE_EVENT(xen_mmu_set_domain_pte,
TP_PROTO(pte_t *ptep, pte_t pteval, unsigned domid),
TP_ARGS(ptep, pteval, domid),
TP_STRUCT__entry(
__field(pte_t *, ptep)
__field(pteval_t, pteval)
__field(unsigned, domid)
),
TP_fast_assign(__entry->ptep = ptep;
__entry->pteval = pteval.pte;
__entry->domid = domid),
TP_printk("ptep %p pteval %0*llx (raw %0*llx) domid %u",
__entry->ptep,
(int)sizeof(pteval_t) * 2, (unsigned long long)pte_val(native_make_pte(__entry->pteval)),
(int)sizeof(pteval_t) * 2, (unsigned long long)__entry->pteval,
__entry->domid)
);
TRACE_EVENT(xen_mmu_set_pte_at,
TP_PROTO(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pteval),
TP_ARGS(mm, addr, ptep, pteval),
TP_STRUCT__entry(
__field(struct mm_struct *, mm)
__field(unsigned long, addr)
__field(pte_t *, ptep)
__field(pteval_t, pteval)
),
TP_fast_assign(__entry->mm = mm;
__entry->addr = addr;
__entry->ptep = ptep;
__entry->pteval = pteval.pte),
TP_printk("mm %p addr %lx ptep %p pteval %0*llx (raw %0*llx)",
__entry->mm, __entry->addr, __entry->ptep,
(int)sizeof(pteval_t) * 2, (unsigned long long)pte_val(native_make_pte(__entry->pteval)),
(int)sizeof(pteval_t) * 2, (unsigned long long)__entry->pteval)
);
TRACE_EVENT(xen_mmu_pte_clear,
TP_PROTO(struct mm_struct *mm, unsigned long addr, pte_t *ptep),
TP_ARGS(mm, addr, ptep),
TP_STRUCT__entry(
__field(struct mm_struct *, mm)
__field(unsigned long, addr)
__field(pte_t *, ptep)
),
TP_fast_assign(__entry->mm = mm;
__entry->addr = addr;
__entry->ptep = ptep),
TP_printk("mm %p addr %lx ptep %p",
__entry->mm, __entry->addr, __entry->ptep)
);
TRACE_EVENT(xen_mmu_set_pmd,
TP_PROTO(pmd_t *pmdp, pmd_t pmdval),
TP_ARGS(pmdp, pmdval),
TP_STRUCT__entry(
__field(pmd_t *, pmdp)
__field(pmdval_t, pmdval)
),
TP_fast_assign(__entry->pmdp = pmdp;
__entry->pmdval = pmdval.pmd),
TP_printk("pmdp %p pmdval %0*llx (raw %0*llx)",
__entry->pmdp,
(int)sizeof(pmdval_t) * 2, (unsigned long long)pmd_val(native_make_pmd(__entry->pmdval)),
(int)sizeof(pmdval_t) * 2, (unsigned long long)__entry->pmdval)
);
TRACE_EVENT(xen_mmu_pmd_clear,
TP_PROTO(pmd_t *pmdp),
TP_ARGS(pmdp),
TP_STRUCT__entry(
__field(pmd_t *, pmdp)
),
TP_fast_assign(__entry->pmdp = pmdp),
TP_printk("pmdp %p", __entry->pmdp)
);
#if PAGETABLE_LEVELS >= 4
TRACE_EVENT(xen_mmu_set_pud,
TP_PROTO(pud_t *pudp, pud_t pudval),
TP_ARGS(pudp, pudval),
TP_STRUCT__entry(
__field(pud_t *, pudp)
__field(pudval_t, pudval)
),
TP_fast_assign(__entry->pudp = pudp;
__entry->pudval = native_pud_val(pudval)),
TP_printk("pudp %p pudval %0*llx (raw %0*llx)",
__entry->pudp,
(int)sizeof(pudval_t) * 2, (unsigned long long)pud_val(native_make_pud(__entry->pudval)),
(int)sizeof(pudval_t) * 2, (unsigned long long)__entry->pudval)
);
TRACE_EVENT(xen_mmu_set_pgd,
TP_PROTO(pgd_t *pgdp, pgd_t *user_pgdp, pgd_t pgdval),
TP_ARGS(pgdp, user_pgdp, pgdval),
TP_STRUCT__entry(
__field(pgd_t *, pgdp)
__field(pgd_t *, user_pgdp)
__field(pgdval_t, pgdval)
),
TP_fast_assign(__entry->pgdp = pgdp;
__entry->user_pgdp = user_pgdp;
__entry->pgdval = pgdval.pgd),
TP_printk("pgdp %p user_pgdp %p pgdval %0*llx (raw %0*llx)",
__entry->pgdp, __entry->user_pgdp,
(int)sizeof(pgdval_t) * 2, (unsigned long long)pgd_val(native_make_pgd(__entry->pgdval)),
(int)sizeof(pgdval_t) * 2, (unsigned long long)__entry->pgdval)
);
TRACE_EVENT(xen_mmu_pud_clear,
TP_PROTO(pud_t *pudp),
TP_ARGS(pudp),
TP_STRUCT__entry(
__field(pud_t *, pudp)
),
TP_fast_assign(__entry->pudp = pudp),
TP_printk("pudp %p", __entry->pudp)
);
#else
TRACE_EVENT(xen_mmu_set_pud,
TP_PROTO(pud_t *pudp, pud_t pudval),
TP_ARGS(pudp, pudval),
TP_STRUCT__entry(
__field(pud_t *, pudp)
__field(pudval_t, pudval)
),
TP_fast_assign(__entry->pudp = pudp;
__entry->pudval = native_pud_val(pudval)),
TP_printk("pudp %p pudval %0*llx (raw %0*llx)",
__entry->pudp,
(int)sizeof(pudval_t) * 2, (unsigned long long)pgd_val(native_make_pgd(__entry->pudval)),
(int)sizeof(pudval_t) * 2, (unsigned long long)__entry->pudval)
);
#endif
TRACE_EVENT(xen_mmu_pgd_clear,
TP_PROTO(pgd_t *pgdp),
TP_ARGS(pgdp),
TP_STRUCT__entry(
__field(pgd_t *, pgdp)
),
TP_fast_assign(__entry->pgdp = pgdp),
TP_printk("pgdp %p", __entry->pgdp)
);
DECLARE_EVENT_CLASS(xen_mmu_ptep_modify_prot,
TP_PROTO(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pteval),
TP_ARGS(mm, addr, ptep, pteval),
TP_STRUCT__entry(
__field(struct mm_struct *, mm)
__field(unsigned long, addr)
__field(pte_t *, ptep)
__field(pteval_t, pteval)
),
TP_fast_assign(__entry->mm = mm;
__entry->addr = addr;
__entry->ptep = ptep;
__entry->pteval = pteval.pte),
TP_printk("mm %p addr %lx ptep %p pteval %0*llx (raw %0*llx)",
__entry->mm, __entry->addr, __entry->ptep,
(int)sizeof(pteval_t) * 2, (unsigned long long)pte_val(native_make_pte(__entry->pteval)),
(int)sizeof(pteval_t) * 2, (unsigned long long)__entry->pteval)
);
#define DEFINE_XEN_MMU_PTEP_MODIFY_PROT(name) \
DEFINE_EVENT(xen_mmu_ptep_modify_prot, name, \
TP_PROTO(struct mm_struct *mm, unsigned long addr, \
pte_t *ptep, pte_t pteval), \
TP_ARGS(mm, addr, ptep, pteval))
DEFINE_XEN_MMU_PTEP_MODIFY_PROT(xen_mmu_ptep_modify_prot_start);
DEFINE_XEN_MMU_PTEP_MODIFY_PROT(xen_mmu_ptep_modify_prot_commit);
TRACE_EVENT(xen_mmu_alloc_ptpage,
TP_PROTO(struct mm_struct *mm, unsigned long pfn, unsigned level, bool pinned),
TP_ARGS(mm, pfn, level, pinned),
TP_STRUCT__entry(
__field(struct mm_struct *, mm)
__field(unsigned long, pfn)
__field(unsigned, level)
__field(bool, pinned)
),
TP_fast_assign(__entry->mm = mm;
__entry->pfn = pfn;
__entry->level = level;
__entry->pinned = pinned),
TP_printk("mm %p pfn %lx level %d %spinned",
__entry->mm, __entry->pfn, __entry->level,
__entry->pinned ? "" : "un")
);
TRACE_EVENT(xen_mmu_release_ptpage,
TP_PROTO(unsigned long pfn, unsigned level, bool pinned),
TP_ARGS(pfn, level, pinned),
TP_STRUCT__entry(
__field(unsigned long, pfn)
__field(unsigned, level)
__field(bool, pinned)
),
TP_fast_assign(__entry->pfn = pfn;
__entry->level = level;
__entry->pinned = pinned),
TP_printk("pfn %lx level %d %spinned",
__entry->pfn, __entry->level,
__entry->pinned ? "" : "un")
);
DECLARE_EVENT_CLASS(xen_mmu_pgd,
TP_PROTO(struct mm_struct *mm, pgd_t *pgd),
TP_ARGS(mm, pgd),
TP_STRUCT__entry(
__field(struct mm_struct *, mm)
__field(pgd_t *, pgd)
),
TP_fast_assign(__entry->mm = mm;
__entry->pgd = pgd),
TP_printk("mm %p pgd %p", __entry->mm, __entry->pgd)
);
#define DEFINE_XEN_MMU_PGD_EVENT(name) \
DEFINE_EVENT(xen_mmu_pgd, name, \
TP_PROTO(struct mm_struct *mm, pgd_t *pgd), \
TP_ARGS(mm, pgd))
DEFINE_XEN_MMU_PGD_EVENT(xen_mmu_pgd_pin);
DEFINE_XEN_MMU_PGD_EVENT(xen_mmu_pgd_unpin);
TRACE_EVENT(xen_mmu_flush_tlb,
TP_PROTO(int x),
TP_ARGS(x),
TP_STRUCT__entry(__array(char, x, 0)),
TP_fast_assign((void)x),
TP_printk("%s", "")
);
TRACE_EVENT(xen_mmu_flush_tlb_single,
TP_PROTO(unsigned long addr),
TP_ARGS(addr),
TP_STRUCT__entry(
__field(unsigned long, addr)
),
TP_fast_assign(__entry->addr = addr),
TP_printk("addr %lx", __entry->addr)
);
TRACE_EVENT(xen_mmu_flush_tlb_others,
TP_PROTO(const struct cpumask *cpus, struct mm_struct *mm,
unsigned long addr),
TP_ARGS(cpus, mm, addr),
TP_STRUCT__entry(
__field(unsigned, ncpus)
__field(struct mm_struct *, mm)
__field(unsigned long, addr)
),
TP_fast_assign(__entry->ncpus = cpumask_weight(cpus);
__entry->mm = mm;
__entry->addr = addr),
TP_printk("ncpus %d mm %p addr %lx",
__entry->ncpus, __entry->mm, __entry->addr)
);
TRACE_EVENT(xen_mmu_write_cr3,
TP_PROTO(bool kernel, unsigned long cr3),
TP_ARGS(kernel, cr3),
TP_STRUCT__entry(
__field(bool, kernel)
__field(unsigned long, cr3)
),
TP_fast_assign(__entry->kernel = kernel;
__entry->cr3 = cr3),
TP_printk("%s cr3 %lx",
__entry->kernel ? "kernel" : "user", __entry->cr3)
);
/* CPU */
TRACE_EVENT(xen_cpu_write_ldt_entry,
TP_PROTO(struct desc_struct *dt, int entrynum, u64 desc),
TP_ARGS(dt, entrynum, desc),
TP_STRUCT__entry(
__field(struct desc_struct *, dt)
__field(int, entrynum)
__field(u64, desc)
),
TP_fast_assign(__entry->dt = dt;
__entry->entrynum = entrynum;
__entry->desc = desc;
),
TP_printk("dt %p entrynum %d entry %016llx",
__entry->dt, __entry->entrynum,
(unsigned long long)__entry->desc)
);
TRACE_EVENT(xen_cpu_write_idt_entry,
TP_PROTO(gate_desc *dt, int entrynum, const gate_desc *ent),
TP_ARGS(dt, entrynum, ent),
TP_STRUCT__entry(
__field(gate_desc *, dt)
__field(int, entrynum)
),
TP_fast_assign(__entry->dt = dt;
__entry->entrynum = entrynum;
),
TP_printk("dt %p entrynum %d",
__entry->dt, __entry->entrynum)
);
TRACE_EVENT(xen_cpu_load_idt,
TP_PROTO(const struct desc_ptr *desc),
TP_ARGS(desc),
TP_STRUCT__entry(
__field(unsigned long, addr)
),
TP_fast_assign(__entry->addr = desc->address),
TP_printk("addr %lx", __entry->addr)
);
TRACE_EVENT(xen_cpu_write_gdt_entry,
TP_PROTO(struct desc_struct *dt, int entrynum, const void *desc, int type),
TP_ARGS(dt, entrynum, desc, type),
TP_STRUCT__entry(
__field(u64, desc)
__field(struct desc_struct *, dt)
__field(int, entrynum)
__field(int, type)
),
TP_fast_assign(__entry->dt = dt;
__entry->entrynum = entrynum;
__entry->desc = *(u64 *)desc;
__entry->type = type;
),
TP_printk("dt %p entrynum %d type %d desc %016llx",
__entry->dt, __entry->entrynum, __entry->type,
(unsigned long long)__entry->desc)
);
TRACE_EVENT(xen_cpu_set_ldt,
TP_PROTO(const void *addr, unsigned entries),
TP_ARGS(addr, entries),
TP_STRUCT__entry(
__field(const void *, addr)
__field(unsigned, entries)
),
TP_fast_assign(__entry->addr = addr;
__entry->entries = entries),
TP_printk("addr %p entries %u",
__entry->addr, __entry->entries)
);
#endif /* _TRACE_XEN_H */
/* This part must be outside protection */
#include <trace/define_trace.h>
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册