提交 9f888b3a 编写于 作者: L Linus Torvalds

Merge tag 'stable/for-linus-3.16-rc0-tag' of...

Merge tag 'stable/for-linus-3.16-rc0-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip into next

Pull Xen updates from David Vrabel:
 "xen: features and fixes for 3.16-rc0
   - support foreign mappings in PVH domains (needed when dom0 is PVH)

   - fix mapping high MMIO regions in x86 PV guests (this is also the
     first half of removing the PAGE_IOMAP PTE flag).

   - ARM suspend/resume support.

   - ARM multicall support"

* tag 'stable/for-linus-3.16-rc0-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip:
  x86/xen: map foreign pfns for autotranslated guests
  xen-acpi-processor: Don't display errors when we get -ENOSYS
  xen/pciback: Document the entry points for 'pcistub_put_pci_dev'
  xen/pciback: Document when the 'unbind' and 'bind' functions are called.
  xen-pciback: Document when we FLR an PCI device.
  xen-pciback: First reset, then free.
  xen-pciback: Cleanup up pcistub_put_pci_dev
  x86/xen: do not use _PAGE_IOMAP in xen_remap_domain_mfn_range()
  x86/xen: set regions above the end of RAM as 1:1
  x86/xen: only warn once if bad MFNs are found during setup
  x86/xen: compactly store large identity ranges in the p2m
  x86/xen: fix set_phys_range_identity() if pfn_e > MAX_P2M_PFN
  x86/xen: rename early_p2m_alloc() and early_p2m_alloc_middle()
  xen/x86: set panic notifier priority to minimum
  arm,arm64/xen: introduce HYPERVISOR_suspend()
  xen: refactor suspend pre/post hooks
  arm: xen: export HYPERVISOR_multicall to modules.
  arm64: introduce virt_to_pfn
  arm/xen: Remove definiition of virt_to_pfn in asm/xen/page.h
  arm: xen: implement multicall hypercall support.
...@@ -34,6 +34,7 @@ ...@@ -34,6 +34,7 @@
#define _ASM_ARM_XEN_HYPERCALL_H #define _ASM_ARM_XEN_HYPERCALL_H
#include <xen/interface/xen.h> #include <xen/interface/xen.h>
#include <xen/interface/sched.h>
long privcmd_call(unsigned call, unsigned long a1, long privcmd_call(unsigned call, unsigned long a1,
unsigned long a2, unsigned long a3, unsigned long a2, unsigned long a3,
...@@ -48,6 +49,16 @@ int HYPERVISOR_memory_op(unsigned int cmd, void *arg); ...@@ -48,6 +49,16 @@ int HYPERVISOR_memory_op(unsigned int cmd, void *arg);
int HYPERVISOR_physdev_op(int cmd, void *arg); int HYPERVISOR_physdev_op(int cmd, void *arg);
int HYPERVISOR_vcpu_op(int cmd, int vcpuid, void *extra_args); int HYPERVISOR_vcpu_op(int cmd, int vcpuid, void *extra_args);
int HYPERVISOR_tmem_op(void *arg); int HYPERVISOR_tmem_op(void *arg);
int HYPERVISOR_multicall(struct multicall_entry *calls, uint32_t nr);
static inline int
HYPERVISOR_suspend(unsigned long start_info_mfn)
{
struct sched_shutdown r = { .reason = SHUTDOWN_suspend };
/* start_info_mfn is unused on ARM */
return HYPERVISOR_sched_op(SCHEDOP_shutdown, &r);
}
static inline void static inline void
MULTI_update_va_mapping(struct multicall_entry *mcl, unsigned long va, MULTI_update_va_mapping(struct multicall_entry *mcl, unsigned long va,
...@@ -63,9 +74,4 @@ MULTI_mmu_update(struct multicall_entry *mcl, struct mmu_update *req, ...@@ -63,9 +74,4 @@ MULTI_mmu_update(struct multicall_entry *mcl, struct mmu_update *req,
BUG(); BUG();
} }
static inline int
HYPERVISOR_multicall(void *call_list, int nr_calls)
{
BUG();
}
#endif /* _ASM_ARM_XEN_HYPERCALL_H */ #endif /* _ASM_ARM_XEN_HYPERCALL_H */
...@@ -40,6 +40,8 @@ typedef uint64_t xen_pfn_t; ...@@ -40,6 +40,8 @@ typedef uint64_t xen_pfn_t;
#define PRI_xen_pfn "llx" #define PRI_xen_pfn "llx"
typedef uint64_t xen_ulong_t; typedef uint64_t xen_ulong_t;
#define PRI_xen_ulong "llx" #define PRI_xen_ulong "llx"
typedef int64_t xen_long_t;
#define PRI_xen_long "llx"
/* Guest handles for primitive C types. */ /* Guest handles for primitive C types. */
__DEFINE_GUEST_HANDLE(uchar, unsigned char); __DEFINE_GUEST_HANDLE(uchar, unsigned char);
__DEFINE_GUEST_HANDLE(uint, unsigned int); __DEFINE_GUEST_HANDLE(uint, unsigned int);
......
...@@ -339,6 +339,14 @@ static int __init xen_pm_init(void) ...@@ -339,6 +339,14 @@ static int __init xen_pm_init(void)
} }
late_initcall(xen_pm_init); late_initcall(xen_pm_init);
/* empty stubs */
void xen_arch_pre_suspend(void) { }
void xen_arch_post_suspend(int suspend_cancelled) { }
void xen_timer_resume(void) { }
void xen_arch_resume(void) { }
/* In the hypervisor.S file. */ /* In the hypervisor.S file. */
EXPORT_SYMBOL_GPL(HYPERVISOR_event_channel_op); EXPORT_SYMBOL_GPL(HYPERVISOR_event_channel_op);
EXPORT_SYMBOL_GPL(HYPERVISOR_grant_table_op); EXPORT_SYMBOL_GPL(HYPERVISOR_grant_table_op);
...@@ -350,4 +358,5 @@ EXPORT_SYMBOL_GPL(HYPERVISOR_memory_op); ...@@ -350,4 +358,5 @@ EXPORT_SYMBOL_GPL(HYPERVISOR_memory_op);
EXPORT_SYMBOL_GPL(HYPERVISOR_physdev_op); EXPORT_SYMBOL_GPL(HYPERVISOR_physdev_op);
EXPORT_SYMBOL_GPL(HYPERVISOR_vcpu_op); EXPORT_SYMBOL_GPL(HYPERVISOR_vcpu_op);
EXPORT_SYMBOL_GPL(HYPERVISOR_tmem_op); EXPORT_SYMBOL_GPL(HYPERVISOR_tmem_op);
EXPORT_SYMBOL_GPL(HYPERVISOR_multicall);
EXPORT_SYMBOL_GPL(privcmd_call); EXPORT_SYMBOL_GPL(privcmd_call);
...@@ -89,6 +89,7 @@ HYPERCALL2(memory_op); ...@@ -89,6 +89,7 @@ HYPERCALL2(memory_op);
HYPERCALL2(physdev_op); HYPERCALL2(physdev_op);
HYPERCALL3(vcpu_op); HYPERCALL3(vcpu_op);
HYPERCALL1(tmem_op); HYPERCALL1(tmem_op);
HYPERCALL2(multicall);
ENTRY(privcmd_call) ENTRY(privcmd_call)
stmdb sp!, {r4} stmdb sp!, {r4}
......
...@@ -80,6 +80,7 @@ HYPERCALL2(memory_op); ...@@ -80,6 +80,7 @@ HYPERCALL2(memory_op);
HYPERCALL2(physdev_op); HYPERCALL2(physdev_op);
HYPERCALL3(vcpu_op); HYPERCALL3(vcpu_op);
HYPERCALL1(tmem_op); HYPERCALL1(tmem_op);
HYPERCALL2(multicall);
ENTRY(privcmd_call) ENTRY(privcmd_call)
mov x16, x0 mov x16, x0
......
...@@ -343,7 +343,7 @@ HYPERVISOR_memory_op(unsigned int cmd, void *arg) ...@@ -343,7 +343,7 @@ HYPERVISOR_memory_op(unsigned int cmd, void *arg)
} }
static inline int static inline int
HYPERVISOR_multicall(void *call_list, int nr_calls) HYPERVISOR_multicall(void *call_list, uint32_t nr_calls)
{ {
return _hypercall2(int, multicall, call_list, nr_calls); return _hypercall2(int, multicall, call_list, nr_calls);
} }
......
...@@ -54,6 +54,9 @@ typedef unsigned long xen_pfn_t; ...@@ -54,6 +54,9 @@ typedef unsigned long xen_pfn_t;
#define PRI_xen_pfn "lx" #define PRI_xen_pfn "lx"
typedef unsigned long xen_ulong_t; typedef unsigned long xen_ulong_t;
#define PRI_xen_ulong "lx" #define PRI_xen_ulong "lx"
typedef long xen_long_t;
#define PRI_xen_long "lx"
/* Guest handles for primitive C types. */ /* Guest handles for primitive C types. */
__DEFINE_GUEST_HANDLE(uchar, unsigned char); __DEFINE_GUEST_HANDLE(uchar, unsigned char);
__DEFINE_GUEST_HANDLE(uint, unsigned int); __DEFINE_GUEST_HANDLE(uint, unsigned int);
......
...@@ -1339,6 +1339,7 @@ xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr) ...@@ -1339,6 +1339,7 @@ xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr)
static struct notifier_block xen_panic_block = { static struct notifier_block xen_panic_block = {
.notifier_call= xen_panic_event, .notifier_call= xen_panic_event,
.priority = INT_MIN
}; };
int xen_panic_handler_init(void) int xen_panic_handler_init(void)
......
...@@ -2510,6 +2510,95 @@ void __init xen_hvm_init_mmu_ops(void) ...@@ -2510,6 +2510,95 @@ void __init xen_hvm_init_mmu_ops(void)
} }
#endif #endif
#ifdef CONFIG_XEN_PVH
/*
* Map foreign gfn (fgfn), to local pfn (lpfn). This for the user
* space creating new guest on pvh dom0 and needing to map domU pages.
*/
static int xlate_add_to_p2m(unsigned long lpfn, unsigned long fgfn,
unsigned int domid)
{
int rc, err = 0;
xen_pfn_t gpfn = lpfn;
xen_ulong_t idx = fgfn;
struct xen_add_to_physmap_range xatp = {
.domid = DOMID_SELF,
.foreign_domid = domid,
.size = 1,
.space = XENMAPSPACE_gmfn_foreign,
};
set_xen_guest_handle(xatp.idxs, &idx);
set_xen_guest_handle(xatp.gpfns, &gpfn);
set_xen_guest_handle(xatp.errs, &err);
rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap_range, &xatp);
if (rc < 0)
return rc;
return err;
}
static int xlate_remove_from_p2m(unsigned long spfn, int count)
{
struct xen_remove_from_physmap xrp;
int i, rc;
for (i = 0; i < count; i++) {
xrp.domid = DOMID_SELF;
xrp.gpfn = spfn+i;
rc = HYPERVISOR_memory_op(XENMEM_remove_from_physmap, &xrp);
if (rc)
break;
}
return rc;
}
struct xlate_remap_data {
unsigned long fgfn; /* foreign domain's gfn */
pgprot_t prot;
domid_t domid;
int index;
struct page **pages;
};
static int xlate_map_pte_fn(pte_t *ptep, pgtable_t token, unsigned long addr,
void *data)
{
int rc;
struct xlate_remap_data *remap = data;
unsigned long pfn = page_to_pfn(remap->pages[remap->index++]);
pte_t pteval = pte_mkspecial(pfn_pte(pfn, remap->prot));
rc = xlate_add_to_p2m(pfn, remap->fgfn, remap->domid);
if (rc)
return rc;
native_set_pte(ptep, pteval);
return 0;
}
static int xlate_remap_gfn_range(struct vm_area_struct *vma,
unsigned long addr, unsigned long mfn,
int nr, pgprot_t prot, unsigned domid,
struct page **pages)
{
int err;
struct xlate_remap_data pvhdata;
BUG_ON(!pages);
pvhdata.fgfn = mfn;
pvhdata.prot = prot;
pvhdata.domid = domid;
pvhdata.index = 0;
pvhdata.pages = pages;
err = apply_to_page_range(vma->vm_mm, addr, nr << PAGE_SHIFT,
xlate_map_pte_fn, &pvhdata);
flush_tlb_all();
return err;
}
#endif
#define REMAP_BATCH_SIZE 16 #define REMAP_BATCH_SIZE 16
struct remap_data { struct remap_data {
...@@ -2522,7 +2611,7 @@ static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token, ...@@ -2522,7 +2611,7 @@ static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token,
unsigned long addr, void *data) unsigned long addr, void *data)
{ {
struct remap_data *rmd = data; struct remap_data *rmd = data;
pte_t pte = pte_mkspecial(pfn_pte(rmd->mfn++, rmd->prot)); pte_t pte = pte_mkspecial(mfn_pte(rmd->mfn++, rmd->prot));
rmd->mmu_update->ptr = virt_to_machine(ptep).maddr; rmd->mmu_update->ptr = virt_to_machine(ptep).maddr;
rmd->mmu_update->val = pte_val_ma(pte); rmd->mmu_update->val = pte_val_ma(pte);
...@@ -2544,13 +2633,18 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma, ...@@ -2544,13 +2633,18 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
unsigned long range; unsigned long range;
int err = 0; int err = 0;
if (xen_feature(XENFEAT_auto_translated_physmap))
return -EINVAL;
prot = __pgprot(pgprot_val(prot) | _PAGE_IOMAP);
BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO))); BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO)));
if (xen_feature(XENFEAT_auto_translated_physmap)) {
#ifdef CONFIG_XEN_PVH
/* We need to update the local page tables and the xen HAP */
return xlate_remap_gfn_range(vma, addr, mfn, nr, prot,
domid, pages);
#else
return -EINVAL;
#endif
}
rmd.mfn = mfn; rmd.mfn = mfn;
rmd.prot = prot; rmd.prot = prot;
...@@ -2588,6 +2682,25 @@ int xen_unmap_domain_mfn_range(struct vm_area_struct *vma, ...@@ -2588,6 +2682,25 @@ int xen_unmap_domain_mfn_range(struct vm_area_struct *vma,
if (!pages || !xen_feature(XENFEAT_auto_translated_physmap)) if (!pages || !xen_feature(XENFEAT_auto_translated_physmap))
return 0; return 0;
#ifdef CONFIG_XEN_PVH
while (numpgs--) {
/*
* The mmu has already cleaned up the process mmu
* resources at this point (lookup_address will return
* NULL).
*/
unsigned long pfn = page_to_pfn(pages[numpgs]);
xlate_remove_from_p2m(pfn, 1);
}
/*
* We don't need to flush tlbs because as part of
* xlate_remove_from_p2m, the hypervisor will do tlb flushes
* after removing the p2m entries from the EPT/NPT
*/
return 0;
#else
return -EINVAL; return -EINVAL;
#endif
} }
EXPORT_SYMBOL_GPL(xen_unmap_domain_mfn_range); EXPORT_SYMBOL_GPL(xen_unmap_domain_mfn_range);
...@@ -36,7 +36,7 @@ ...@@ -36,7 +36,7 @@
* pfn_to_mfn(0xc0000)=0xc0000 * pfn_to_mfn(0xc0000)=0xc0000
* *
* The benefit of this is, that we can assume for non-RAM regions (think * The benefit of this is, that we can assume for non-RAM regions (think
* PCI BARs, or ACPI spaces), we can create mappings easily b/c we * PCI BARs, or ACPI spaces), we can create mappings easily because we
* get the PFN value to match the MFN. * get the PFN value to match the MFN.
* *
* For this to work efficiently we have one new page p2m_identity and * For this to work efficiently we have one new page p2m_identity and
...@@ -60,7 +60,7 @@ ...@@ -60,7 +60,7 @@
* There is also a digram of the P2M at the end that can help. * There is also a digram of the P2M at the end that can help.
* Imagine your E820 looking as so: * Imagine your E820 looking as so:
* *
* 1GB 2GB * 1GB 2GB 4GB
* /-------------------+---------\/----\ /----------\ /---+-----\ * /-------------------+---------\/----\ /----------\ /---+-----\
* | System RAM | Sys RAM ||ACPI| | reserved | | Sys RAM | * | System RAM | Sys RAM ||ACPI| | reserved | | Sys RAM |
* \-------------------+---------/\----/ \----------/ \---+-----/ * \-------------------+---------/\----/ \----------/ \---+-----/
...@@ -77,9 +77,8 @@ ...@@ -77,9 +77,8 @@
* of the PFN and the end PFN (263424 and 512256 respectively). The first step * of the PFN and the end PFN (263424 and 512256 respectively). The first step
* is to reserve_brk a top leaf page if the p2m[1] is missing. The top leaf page * is to reserve_brk a top leaf page if the p2m[1] is missing. The top leaf page
* covers 512^2 of page estate (1GB) and in case the start or end PFN is not * covers 512^2 of page estate (1GB) and in case the start or end PFN is not
* aligned on 512^2*PAGE_SIZE (1GB) we loop on aligned 1GB PFNs from start pfn * aligned on 512^2*PAGE_SIZE (1GB) we reserve_brk new middle and leaf pages as
* to end pfn. We reserve_brk top leaf pages if they are missing (means they * required to split any existing p2m_mid_missing middle pages.
* point to p2m_mid_missing).
* *
* With the E820 example above, 263424 is not 1GB aligned so we allocate a * With the E820 example above, 263424 is not 1GB aligned so we allocate a
* reserve_brk page which will cover the PFNs estate from 0x40000 to 0x80000. * reserve_brk page which will cover the PFNs estate from 0x40000 to 0x80000.
...@@ -88,7 +87,7 @@ ...@@ -88,7 +87,7 @@
* Next stage is to determine if we need to do a more granular boundary check * Next stage is to determine if we need to do a more granular boundary check
* on the 4MB (or 2MB depending on architecture) off the start and end pfn's. * on the 4MB (or 2MB depending on architecture) off the start and end pfn's.
* We check if the start pfn and end pfn violate that boundary check, and if * We check if the start pfn and end pfn violate that boundary check, and if
* so reserve_brk a middle (p2m[x][y]) leaf page. This way we have a much finer * so reserve_brk a (p2m[x][y]) leaf page. This way we have a much finer
* granularity of setting which PFNs are missing and which ones are identity. * granularity of setting which PFNs are missing and which ones are identity.
* In our example 263424 and 512256 both fail the check so we reserve_brk two * In our example 263424 and 512256 both fail the check so we reserve_brk two
* pages. Populate them with INVALID_P2M_ENTRY (so they both have "missing" * pages. Populate them with INVALID_P2M_ENTRY (so they both have "missing"
...@@ -102,9 +101,10 @@ ...@@ -102,9 +101,10 @@
* *
* The next step is to walk from the start pfn to the end pfn setting * The next step is to walk from the start pfn to the end pfn setting
* the IDENTITY_FRAME_BIT on each PFN. This is done in set_phys_range_identity. * the IDENTITY_FRAME_BIT on each PFN. This is done in set_phys_range_identity.
* If we find that the middle leaf is pointing to p2m_missing we can swap it * If we find that the middle entry is pointing to p2m_missing we can swap it
* over to p2m_identity - this way covering 4MB (or 2MB) PFN space. At this * over to p2m_identity - this way covering 4MB (or 2MB) PFN space (and
* point we do not need to worry about boundary aligment (so no need to * similarly swapping p2m_mid_missing for p2m_mid_identity for larger regions).
* At this point we do not need to worry about boundary aligment (so no need to
* reserve_brk a middle page, figure out which PFNs are "missing" and which * reserve_brk a middle page, figure out which PFNs are "missing" and which
* ones are identity), as that has been done earlier. If we find that the * ones are identity), as that has been done earlier. If we find that the
* middle leaf is not occupied by p2m_identity or p2m_missing, we dereference * middle leaf is not occupied by p2m_identity or p2m_missing, we dereference
...@@ -118,6 +118,9 @@ ...@@ -118,6 +118,9 @@
* considered missing). In our case, p2m[1][2][0->255] and p2m[1][488][257->511] * considered missing). In our case, p2m[1][2][0->255] and p2m[1][488][257->511]
* contain the INVALID_P2M_ENTRY value and are considered "missing." * contain the INVALID_P2M_ENTRY value and are considered "missing."
* *
* Finally, the region beyond the end of of the E820 (4 GB in this example)
* is set to be identity (in case there are MMIO regions placed here).
*
* This is what the p2m ends up looking (for the E820 above) with this * This is what the p2m ends up looking (for the E820 above) with this
* fabulous drawing: * fabulous drawing:
* *
...@@ -129,21 +132,27 @@ ...@@ -129,21 +132,27 @@
* |-----| \ | [p2m_identity]+\\ | .... | * |-----| \ | [p2m_identity]+\\ | .... |
* | 2 |--\ \-------------------->| ... | \\ \----------------/ * | 2 |--\ \-------------------->| ... | \\ \----------------/
* |-----| \ \---------------/ \\ * |-----| \ \---------------/ \\
* | 3 |\ \ \\ p2m_identity * | 3 |-\ \ \\ p2m_identity [1]
* |-----| \ \-------------------->/---------------\ /-----------------\ * |-----| \ \-------------------->/---------------\ /-----------------\
* | .. +->+ | [p2m_identity]+-->| ~0, ~0, ~0, ... | * | .. |\ | | [p2m_identity]+-->| ~0, ~0, ~0, ... |
* \-----/ / | [p2m_identity]+-->| ..., ~0 | * \-----/ | | | [p2m_identity]+-->| ..., ~0 |
* / /---------------\ | .... | \-----------------/ * | | | .... | \-----------------/
* / | IDENTITY[@0] | /-+-[x], ~0, ~0.. | * | | +-[x], ~0, ~0.. +\
* / | IDENTITY[@256]|<----/ \---------------/ * | | \---------------/ \
* / | ~0, ~0, .... | * | | \-> /---------------\
* | \---------------/ * | V p2m_mid_missing p2m_missing | IDENTITY[@0] |
* | * | /-----------------\ /------------\ | IDENTITY[@256]|
* p2m_mid_missing p2m_missing * | | [p2m_missing] +---->| ~0, ~0, ...| | ~0, ~0, .... |
* /-----------------\ /------------\ * | | [p2m_missing] +---->| ..., ~0 | \---------------/
* | [p2m_missing] +---->| ~0, ~0, ~0 | * | | ... | \------------/
* | [p2m_missing] +---->| ..., ~0 | * | \-----------------/
* \-----------------/ \------------/ * |
* | p2m_mid_identity
* | /-----------------\
* \-->| [p2m_identity] +---->[1]
* | [p2m_identity] +---->[1]
* | ... |
* \-----------------/
* *
* where ~0 is INVALID_P2M_ENTRY. IDENTITY is (PFN | IDENTITY_BIT) * where ~0 is INVALID_P2M_ENTRY. IDENTITY is (PFN | IDENTITY_BIT)
*/ */
...@@ -187,13 +196,15 @@ static RESERVE_BRK_ARRAY(unsigned long, p2m_top_mfn, P2M_TOP_PER_PAGE); ...@@ -187,13 +196,15 @@ static RESERVE_BRK_ARRAY(unsigned long, p2m_top_mfn, P2M_TOP_PER_PAGE);
static RESERVE_BRK_ARRAY(unsigned long *, p2m_top_mfn_p, P2M_TOP_PER_PAGE); static RESERVE_BRK_ARRAY(unsigned long *, p2m_top_mfn_p, P2M_TOP_PER_PAGE);
static RESERVE_BRK_ARRAY(unsigned long, p2m_identity, P2M_PER_PAGE); static RESERVE_BRK_ARRAY(unsigned long, p2m_identity, P2M_PER_PAGE);
static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_identity, P2M_MID_PER_PAGE);
static RESERVE_BRK_ARRAY(unsigned long, p2m_mid_identity_mfn, P2M_MID_PER_PAGE);
RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
/* We might hit two boundary violations at the start and end, at max each /* We might hit two boundary violations at the start and end, at max each
* boundary violation will require three middle nodes. */ * boundary violation will require three middle nodes. */
RESERVE_BRK(p2m_mid_identity, PAGE_SIZE * 2 * 3); RESERVE_BRK(p2m_mid_extra, PAGE_SIZE * 2 * 3);
/* When we populate back during bootup, the amount of pages can vary. The /* When we populate back during bootup, the amount of pages can vary. The
* max we have is seen is 395979, but that does not mean it can't be more. * max we have is seen is 395979, but that does not mean it can't be more.
...@@ -242,20 +253,20 @@ static void p2m_top_mfn_p_init(unsigned long **top) ...@@ -242,20 +253,20 @@ static void p2m_top_mfn_p_init(unsigned long **top)
top[i] = p2m_mid_missing_mfn; top[i] = p2m_mid_missing_mfn;
} }
static void p2m_mid_init(unsigned long **mid) static void p2m_mid_init(unsigned long **mid, unsigned long *leaf)
{ {
unsigned i; unsigned i;
for (i = 0; i < P2M_MID_PER_PAGE; i++) for (i = 0; i < P2M_MID_PER_PAGE; i++)
mid[i] = p2m_missing; mid[i] = leaf;
} }
static void p2m_mid_mfn_init(unsigned long *mid) static void p2m_mid_mfn_init(unsigned long *mid, unsigned long *leaf)
{ {
unsigned i; unsigned i;
for (i = 0; i < P2M_MID_PER_PAGE; i++) for (i = 0; i < P2M_MID_PER_PAGE; i++)
mid[i] = virt_to_mfn(p2m_missing); mid[i] = virt_to_mfn(leaf);
} }
static void p2m_init(unsigned long *p2m) static void p2m_init(unsigned long *p2m)
...@@ -286,7 +297,9 @@ void __ref xen_build_mfn_list_list(void) ...@@ -286,7 +297,9 @@ void __ref xen_build_mfn_list_list(void)
/* Pre-initialize p2m_top_mfn to be completely missing */ /* Pre-initialize p2m_top_mfn to be completely missing */
if (p2m_top_mfn == NULL) { if (p2m_top_mfn == NULL) {
p2m_mid_missing_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE); p2m_mid_missing_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE);
p2m_mid_mfn_init(p2m_mid_missing_mfn); p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing);
p2m_mid_identity_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE);
p2m_mid_mfn_init(p2m_mid_identity_mfn, p2m_identity);
p2m_top_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE); p2m_top_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE);
p2m_top_mfn_p_init(p2m_top_mfn_p); p2m_top_mfn_p_init(p2m_top_mfn_p);
...@@ -295,7 +308,8 @@ void __ref xen_build_mfn_list_list(void) ...@@ -295,7 +308,8 @@ void __ref xen_build_mfn_list_list(void)
p2m_top_mfn_init(p2m_top_mfn); p2m_top_mfn_init(p2m_top_mfn);
} else { } else {
/* Reinitialise, mfn's all change after migration */ /* Reinitialise, mfn's all change after migration */
p2m_mid_mfn_init(p2m_mid_missing_mfn); p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing);
p2m_mid_mfn_init(p2m_mid_identity_mfn, p2m_identity);
} }
for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += P2M_PER_PAGE) { for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += P2M_PER_PAGE) {
...@@ -327,7 +341,7 @@ void __ref xen_build_mfn_list_list(void) ...@@ -327,7 +341,7 @@ void __ref xen_build_mfn_list_list(void)
* it too late. * it too late.
*/ */
mid_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE); mid_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE);
p2m_mid_mfn_init(mid_mfn_p); p2m_mid_mfn_init(mid_mfn_p, p2m_missing);
p2m_top_mfn_p[topidx] = mid_mfn_p; p2m_top_mfn_p[topidx] = mid_mfn_p;
} }
...@@ -365,16 +379,17 @@ void __init xen_build_dynamic_phys_to_machine(void) ...@@ -365,16 +379,17 @@ void __init xen_build_dynamic_phys_to_machine(void)
p2m_missing = extend_brk(PAGE_SIZE, PAGE_SIZE); p2m_missing = extend_brk(PAGE_SIZE, PAGE_SIZE);
p2m_init(p2m_missing); p2m_init(p2m_missing);
p2m_identity = extend_brk(PAGE_SIZE, PAGE_SIZE);
p2m_init(p2m_identity);
p2m_mid_missing = extend_brk(PAGE_SIZE, PAGE_SIZE); p2m_mid_missing = extend_brk(PAGE_SIZE, PAGE_SIZE);
p2m_mid_init(p2m_mid_missing); p2m_mid_init(p2m_mid_missing, p2m_missing);
p2m_mid_identity = extend_brk(PAGE_SIZE, PAGE_SIZE);
p2m_mid_init(p2m_mid_identity, p2m_identity);
p2m_top = extend_brk(PAGE_SIZE, PAGE_SIZE); p2m_top = extend_brk(PAGE_SIZE, PAGE_SIZE);
p2m_top_init(p2m_top); p2m_top_init(p2m_top);
p2m_identity = extend_brk(PAGE_SIZE, PAGE_SIZE);
p2m_init(p2m_identity);
/* /*
* The domain builder gives us a pre-constructed p2m array in * The domain builder gives us a pre-constructed p2m array in
* mfn_list for all the pages initially given to us, so we just * mfn_list for all the pages initially given to us, so we just
...@@ -386,7 +401,7 @@ void __init xen_build_dynamic_phys_to_machine(void) ...@@ -386,7 +401,7 @@ void __init xen_build_dynamic_phys_to_machine(void)
if (p2m_top[topidx] == p2m_mid_missing) { if (p2m_top[topidx] == p2m_mid_missing) {
unsigned long **mid = extend_brk(PAGE_SIZE, PAGE_SIZE); unsigned long **mid = extend_brk(PAGE_SIZE, PAGE_SIZE);
p2m_mid_init(mid); p2m_mid_init(mid, p2m_missing);
p2m_top[topidx] = mid; p2m_top[topidx] = mid;
} }
...@@ -492,7 +507,7 @@ unsigned long get_phys_to_machine(unsigned long pfn) ...@@ -492,7 +507,7 @@ unsigned long get_phys_to_machine(unsigned long pfn)
unsigned topidx, mididx, idx; unsigned topidx, mididx, idx;
if (unlikely(pfn >= MAX_P2M_PFN)) if (unlikely(pfn >= MAX_P2M_PFN))
return INVALID_P2M_ENTRY; return IDENTITY_FRAME(pfn);
topidx = p2m_top_index(pfn); topidx = p2m_top_index(pfn);
mididx = p2m_mid_index(pfn); mididx = p2m_mid_index(pfn);
...@@ -545,7 +560,7 @@ static bool alloc_p2m(unsigned long pfn) ...@@ -545,7 +560,7 @@ static bool alloc_p2m(unsigned long pfn)
if (!mid) if (!mid)
return false; return false;
p2m_mid_init(mid); p2m_mid_init(mid, p2m_missing);
if (cmpxchg(top_p, p2m_mid_missing, mid) != p2m_mid_missing) if (cmpxchg(top_p, p2m_mid_missing, mid) != p2m_mid_missing)
free_p2m_page(mid); free_p2m_page(mid);
...@@ -565,7 +580,7 @@ static bool alloc_p2m(unsigned long pfn) ...@@ -565,7 +580,7 @@ static bool alloc_p2m(unsigned long pfn)
if (!mid_mfn) if (!mid_mfn)
return false; return false;
p2m_mid_mfn_init(mid_mfn); p2m_mid_mfn_init(mid_mfn, p2m_missing);
missing_mfn = virt_to_mfn(p2m_mid_missing_mfn); missing_mfn = virt_to_mfn(p2m_mid_missing_mfn);
mid_mfn_mfn = virt_to_mfn(mid_mfn); mid_mfn_mfn = virt_to_mfn(mid_mfn);
...@@ -596,7 +611,7 @@ static bool alloc_p2m(unsigned long pfn) ...@@ -596,7 +611,7 @@ static bool alloc_p2m(unsigned long pfn)
return true; return true;
} }
static bool __init early_alloc_p2m_middle(unsigned long pfn, bool check_boundary) static bool __init early_alloc_p2m(unsigned long pfn, bool check_boundary)
{ {
unsigned topidx, mididx, idx; unsigned topidx, mididx, idx;
unsigned long *p2m; unsigned long *p2m;
...@@ -638,7 +653,7 @@ static bool __init early_alloc_p2m_middle(unsigned long pfn, bool check_boundary ...@@ -638,7 +653,7 @@ static bool __init early_alloc_p2m_middle(unsigned long pfn, bool check_boundary
return true; return true;
} }
static bool __init early_alloc_p2m(unsigned long pfn) static bool __init early_alloc_p2m_middle(unsigned long pfn)
{ {
unsigned topidx = p2m_top_index(pfn); unsigned topidx = p2m_top_index(pfn);
unsigned long *mid_mfn_p; unsigned long *mid_mfn_p;
...@@ -649,7 +664,7 @@ static bool __init early_alloc_p2m(unsigned long pfn) ...@@ -649,7 +664,7 @@ static bool __init early_alloc_p2m(unsigned long pfn)
if (mid == p2m_mid_missing) { if (mid == p2m_mid_missing) {
mid = extend_brk(PAGE_SIZE, PAGE_SIZE); mid = extend_brk(PAGE_SIZE, PAGE_SIZE);
p2m_mid_init(mid); p2m_mid_init(mid, p2m_missing);
p2m_top[topidx] = mid; p2m_top[topidx] = mid;
...@@ -658,12 +673,12 @@ static bool __init early_alloc_p2m(unsigned long pfn) ...@@ -658,12 +673,12 @@ static bool __init early_alloc_p2m(unsigned long pfn)
/* And the save/restore P2M tables.. */ /* And the save/restore P2M tables.. */
if (mid_mfn_p == p2m_mid_missing_mfn) { if (mid_mfn_p == p2m_mid_missing_mfn) {
mid_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE); mid_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE);
p2m_mid_mfn_init(mid_mfn_p); p2m_mid_mfn_init(mid_mfn_p, p2m_missing);
p2m_top_mfn_p[topidx] = mid_mfn_p; p2m_top_mfn_p[topidx] = mid_mfn_p;
p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p); p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p);
/* Note: we don't set mid_mfn_p[midix] here, /* Note: we don't set mid_mfn_p[midix] here,
* look in early_alloc_p2m_middle */ * look in early_alloc_p2m() */
} }
return true; return true;
} }
...@@ -739,7 +754,7 @@ bool __init early_can_reuse_p2m_middle(unsigned long set_pfn, unsigned long set_ ...@@ -739,7 +754,7 @@ bool __init early_can_reuse_p2m_middle(unsigned long set_pfn, unsigned long set_
/* This shouldn't happen */ /* This shouldn't happen */
if (WARN_ON(p2m_top[topidx] == p2m_mid_missing)) if (WARN_ON(p2m_top[topidx] == p2m_mid_missing))
early_alloc_p2m(set_pfn); early_alloc_p2m_middle(set_pfn);
if (WARN_ON(p2m_top[topidx][mididx] != p2m_missing)) if (WARN_ON(p2m_top[topidx][mididx] != p2m_missing))
return false; return false;
...@@ -754,13 +769,13 @@ bool __init early_can_reuse_p2m_middle(unsigned long set_pfn, unsigned long set_ ...@@ -754,13 +769,13 @@ bool __init early_can_reuse_p2m_middle(unsigned long set_pfn, unsigned long set_
bool __init early_set_phys_to_machine(unsigned long pfn, unsigned long mfn) bool __init early_set_phys_to_machine(unsigned long pfn, unsigned long mfn)
{ {
if (unlikely(!__set_phys_to_machine(pfn, mfn))) { if (unlikely(!__set_phys_to_machine(pfn, mfn))) {
if (!early_alloc_p2m(pfn)) if (!early_alloc_p2m_middle(pfn))
return false; return false;
if (early_can_reuse_p2m_middle(pfn, mfn)) if (early_can_reuse_p2m_middle(pfn, mfn))
return __set_phys_to_machine(pfn, mfn); return __set_phys_to_machine(pfn, mfn);
if (!early_alloc_p2m_middle(pfn, false /* boundary crossover OK!*/)) if (!early_alloc_p2m(pfn, false /* boundary crossover OK!*/))
return false; return false;
if (!__set_phys_to_machine(pfn, mfn)) if (!__set_phys_to_machine(pfn, mfn))
...@@ -769,12 +784,30 @@ bool __init early_set_phys_to_machine(unsigned long pfn, unsigned long mfn) ...@@ -769,12 +784,30 @@ bool __init early_set_phys_to_machine(unsigned long pfn, unsigned long mfn)
return true; return true;
} }
static void __init early_split_p2m(unsigned long pfn)
{
unsigned long mididx, idx;
mididx = p2m_mid_index(pfn);
idx = p2m_index(pfn);
/*
* Allocate new middle and leaf pages if this pfn lies in the
* middle of one.
*/
if (mididx || idx)
early_alloc_p2m_middle(pfn);
if (idx)
early_alloc_p2m(pfn, false);
}
unsigned long __init set_phys_range_identity(unsigned long pfn_s, unsigned long __init set_phys_range_identity(unsigned long pfn_s,
unsigned long pfn_e) unsigned long pfn_e)
{ {
unsigned long pfn; unsigned long pfn;
if (unlikely(pfn_s >= MAX_P2M_PFN || pfn_e >= MAX_P2M_PFN)) if (unlikely(pfn_s >= MAX_P2M_PFN))
return 0; return 0;
if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) if (unlikely(xen_feature(XENFEAT_auto_translated_physmap)))
...@@ -783,19 +816,30 @@ unsigned long __init set_phys_range_identity(unsigned long pfn_s, ...@@ -783,19 +816,30 @@ unsigned long __init set_phys_range_identity(unsigned long pfn_s,
if (pfn_s > pfn_e) if (pfn_s > pfn_e)
return 0; return 0;
for (pfn = (pfn_s & ~(P2M_MID_PER_PAGE * P2M_PER_PAGE - 1)); if (pfn_e > MAX_P2M_PFN)
pfn < ALIGN(pfn_e, (P2M_MID_PER_PAGE * P2M_PER_PAGE)); pfn_e = MAX_P2M_PFN;
pfn += P2M_MID_PER_PAGE * P2M_PER_PAGE)
{
WARN_ON(!early_alloc_p2m(pfn));
}
early_alloc_p2m_middle(pfn_s, true); early_split_p2m(pfn_s);
early_alloc_p2m_middle(pfn_e, true); early_split_p2m(pfn_e);
for (pfn = pfn_s; pfn < pfn_e;) {
unsigned topidx = p2m_top_index(pfn);
unsigned mididx = p2m_mid_index(pfn);
for (pfn = pfn_s; pfn < pfn_e; pfn++)
if (!__set_phys_to_machine(pfn, IDENTITY_FRAME(pfn))) if (!__set_phys_to_machine(pfn, IDENTITY_FRAME(pfn)))
break; break;
pfn++;
/*
* If the PFN was set to a middle or leaf identity
* page the remainder must also be identity, so skip
* ahead to the next middle or leaf entry.
*/
if (p2m_top[topidx] == p2m_mid_identity)
pfn = ALIGN(pfn, P2M_MID_PER_PAGE * P2M_PER_PAGE);
else if (p2m_top[topidx][mididx] == p2m_identity)
pfn = ALIGN(pfn, P2M_PER_PAGE);
}
if (!WARN((pfn - pfn_s) != (pfn_e - pfn_s), if (!WARN((pfn - pfn_s) != (pfn_e - pfn_s),
"Identity mapping failed. We are %ld short of 1-1 mappings!\n", "Identity mapping failed. We are %ld short of 1-1 mappings!\n",
...@@ -825,8 +869,22 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) ...@@ -825,8 +869,22 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
/* For sparse holes were the p2m leaf has real PFN along with /* For sparse holes were the p2m leaf has real PFN along with
* PCI holes, stick in the PFN as the MFN value. * PCI holes, stick in the PFN as the MFN value.
*
* set_phys_range_identity() will have allocated new middle
* and leaf pages as required so an existing p2m_mid_missing
* or p2m_missing mean that whole range will be identity so
* these can be switched to p2m_mid_identity or p2m_identity.
*/ */
if (mfn != INVALID_P2M_ENTRY && (mfn & IDENTITY_FRAME_BIT)) { if (mfn != INVALID_P2M_ENTRY && (mfn & IDENTITY_FRAME_BIT)) {
if (p2m_top[topidx] == p2m_mid_identity)
return true;
if (p2m_top[topidx] == p2m_mid_missing) {
WARN_ON(cmpxchg(&p2m_top[topidx], p2m_mid_missing,
p2m_mid_identity) != p2m_mid_missing);
return true;
}
if (p2m_top[topidx][mididx] == p2m_identity) if (p2m_top[topidx][mididx] == p2m_identity)
return true; return true;
......
...@@ -89,10 +89,10 @@ static void __init xen_add_extra_mem(u64 start, u64 size) ...@@ -89,10 +89,10 @@ static void __init xen_add_extra_mem(u64 start, u64 size)
for (pfn = PFN_DOWN(start); pfn < xen_max_p2m_pfn; pfn++) { for (pfn = PFN_DOWN(start); pfn < xen_max_p2m_pfn; pfn++) {
unsigned long mfn = pfn_to_mfn(pfn); unsigned long mfn = pfn_to_mfn(pfn);
if (WARN(mfn == pfn, "Trying to over-write 1-1 mapping (pfn: %lx)\n", pfn)) if (WARN_ONCE(mfn == pfn, "Trying to over-write 1-1 mapping (pfn: %lx)\n", pfn))
continue; continue;
WARN(mfn != INVALID_P2M_ENTRY, "Trying to remove %lx which has %lx mfn!\n", WARN_ONCE(mfn != INVALID_P2M_ENTRY, "Trying to remove %lx which has %lx mfn!\n",
pfn, mfn); pfn, mfn);
__set_phys_to_machine(pfn, INVALID_P2M_ENTRY); __set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
} }
...@@ -468,6 +468,15 @@ char * __init xen_memory_setup(void) ...@@ -468,6 +468,15 @@ char * __init xen_memory_setup(void)
i++; i++;
} }
/*
* Set the rest as identity mapped, in case PCI BARs are
* located here.
*
* PFNs above MAX_P2M_PFN are considered identity mapped as
* well.
*/
set_phys_range_identity(map[i-1].addr / PAGE_SIZE, ~0ul);
/* /*
* In domU, the ISA region is normal, usable memory, but we * In domU, the ISA region is normal, usable memory, but we
* reserve ISA memory anyway because too many things poke * reserve ISA memory anyway because too many things poke
......
...@@ -12,8 +12,10 @@ ...@@ -12,8 +12,10 @@
#include "xen-ops.h" #include "xen-ops.h"
#include "mmu.h" #include "mmu.h"
void xen_arch_pre_suspend(void) static void xen_pv_pre_suspend(void)
{ {
xen_mm_pin_all();
xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn); xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn);
xen_start_info->console.domU.mfn = xen_start_info->console.domU.mfn =
mfn_to_pfn(xen_start_info->console.domU.mfn); mfn_to_pfn(xen_start_info->console.domU.mfn);
...@@ -26,7 +28,7 @@ void xen_arch_pre_suspend(void) ...@@ -26,7 +28,7 @@ void xen_arch_pre_suspend(void)
BUG(); BUG();
} }
void xen_arch_hvm_post_suspend(int suspend_cancelled) static void xen_hvm_post_suspend(int suspend_cancelled)
{ {
#ifdef CONFIG_XEN_PVHVM #ifdef CONFIG_XEN_PVHVM
int cpu; int cpu;
...@@ -41,7 +43,7 @@ void xen_arch_hvm_post_suspend(int suspend_cancelled) ...@@ -41,7 +43,7 @@ void xen_arch_hvm_post_suspend(int suspend_cancelled)
#endif #endif
} }
void xen_arch_post_suspend(int suspend_cancelled) static void xen_pv_post_suspend(int suspend_cancelled)
{ {
xen_build_mfn_list_list(); xen_build_mfn_list_list();
...@@ -60,6 +62,21 @@ void xen_arch_post_suspend(int suspend_cancelled) ...@@ -60,6 +62,21 @@ void xen_arch_post_suspend(int suspend_cancelled)
xen_vcpu_restore(); xen_vcpu_restore();
} }
xen_mm_unpin_all();
}
void xen_arch_pre_suspend(void)
{
if (xen_pv_domain())
xen_pv_pre_suspend();
}
void xen_arch_post_suspend(int cancelled)
{
if (xen_pv_domain())
xen_pv_post_suspend(cancelled);
else
xen_hvm_post_suspend(cancelled);
} }
static void xen_vcpu_notify_restore(void *data) static void xen_vcpu_notify_restore(void *data)
......
...@@ -31,6 +31,8 @@ void xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn); ...@@ -31,6 +31,8 @@ void xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn);
void xen_reserve_top(void); void xen_reserve_top(void);
extern unsigned long xen_max_p2m_pfn; extern unsigned long xen_max_p2m_pfn;
void xen_mm_pin_all(void);
void xen_mm_unpin_all(void);
void xen_set_pat(u64); void xen_set_pat(u64);
char * __init xen_memory_setup(void); char * __init xen_memory_setup(void);
......
...@@ -41,9 +41,6 @@ static enum shutdown_state shutting_down = SHUTDOWN_INVALID; ...@@ -41,9 +41,6 @@ static enum shutdown_state shutting_down = SHUTDOWN_INVALID;
struct suspend_info { struct suspend_info {
int cancelled; int cancelled;
unsigned long arg; /* extra hypercall argument */
void (*pre)(void);
void (*post)(int cancelled);
}; };
static RAW_NOTIFIER_HEAD(xen_resume_notifier); static RAW_NOTIFIER_HEAD(xen_resume_notifier);
...@@ -61,26 +58,6 @@ void xen_resume_notifier_unregister(struct notifier_block *nb) ...@@ -61,26 +58,6 @@ void xen_resume_notifier_unregister(struct notifier_block *nb)
EXPORT_SYMBOL_GPL(xen_resume_notifier_unregister); EXPORT_SYMBOL_GPL(xen_resume_notifier_unregister);
#ifdef CONFIG_HIBERNATE_CALLBACKS #ifdef CONFIG_HIBERNATE_CALLBACKS
static void xen_hvm_post_suspend(int cancelled)
{
xen_arch_hvm_post_suspend(cancelled);
gnttab_resume();
}
static void xen_pre_suspend(void)
{
xen_mm_pin_all();
gnttab_suspend();
xen_arch_pre_suspend();
}
static void xen_post_suspend(int cancelled)
{
xen_arch_post_suspend(cancelled);
gnttab_resume();
xen_mm_unpin_all();
}
static int xen_suspend(void *data) static int xen_suspend(void *data)
{ {
struct suspend_info *si = data; struct suspend_info *si = data;
...@@ -94,18 +71,20 @@ static int xen_suspend(void *data) ...@@ -94,18 +71,20 @@ static int xen_suspend(void *data)
return err; return err;
} }
if (si->pre) gnttab_suspend();
si->pre(); xen_arch_pre_suspend();
/* /*
* This hypercall returns 1 if suspend was cancelled * This hypercall returns 1 if suspend was cancelled
* or the domain was merely checkpointed, and 0 if it * or the domain was merely checkpointed, and 0 if it
* is resuming in a new domain. * is resuming in a new domain.
*/ */
si->cancelled = HYPERVISOR_suspend(si->arg); si->cancelled = HYPERVISOR_suspend(xen_pv_domain()
? virt_to_mfn(xen_start_info)
: 0);
if (si->post) xen_arch_post_suspend(si->cancelled);
si->post(si->cancelled); gnttab_resume();
if (!si->cancelled) { if (!si->cancelled) {
xen_irq_resume(); xen_irq_resume();
...@@ -154,16 +133,6 @@ static void do_suspend(void) ...@@ -154,16 +133,6 @@ static void do_suspend(void)
si.cancelled = 1; si.cancelled = 1;
if (xen_hvm_domain()) {
si.arg = 0UL;
si.pre = NULL;
si.post = &xen_hvm_post_suspend;
} else {
si.arg = virt_to_mfn(xen_start_info);
si.pre = &xen_pre_suspend;
si.post = &xen_post_suspend;
}
err = stop_machine(xen_suspend, &si, cpumask_of(0)); err = stop_machine(xen_suspend, &si, cpumask_of(0));
raw_notifier_call_chain(&xen_resume_notifier, 0, NULL); raw_notifier_call_chain(&xen_resume_notifier, 0, NULL);
......
...@@ -127,7 +127,7 @@ static int push_cxx_to_hypervisor(struct acpi_processor *_pr) ...@@ -127,7 +127,7 @@ static int push_cxx_to_hypervisor(struct acpi_processor *_pr)
pr_debug(" C%d: %s %d uS\n", pr_debug(" C%d: %s %d uS\n",
cx->type, cx->desc, (u32)cx->latency); cx->type, cx->desc, (u32)cx->latency);
} }
} else if (ret != -EINVAL) } else if ((ret != -EINVAL) && (ret != -ENOSYS))
/* EINVAL means the ACPI ID is incorrect - meaning the ACPI /* EINVAL means the ACPI ID is incorrect - meaning the ACPI
* table is referencing a non-existing CPU - which can happen * table is referencing a non-existing CPU - which can happen
* with broken ACPI tables. */ * with broken ACPI tables. */
...@@ -259,7 +259,7 @@ static int push_pxx_to_hypervisor(struct acpi_processor *_pr) ...@@ -259,7 +259,7 @@ static int push_pxx_to_hypervisor(struct acpi_processor *_pr)
(u32) perf->states[i].power, (u32) perf->states[i].power,
(u32) perf->states[i].transition_latency); (u32) perf->states[i].transition_latency);
} }
} else if (ret != -EINVAL) } else if ((ret != -EINVAL) && (ret != -ENOSYS))
/* EINVAL means the ACPI ID is incorrect - meaning the ACPI /* EINVAL means the ACPI ID is incorrect - meaning the ACPI
* table is referencing a non-existing CPU - which can happen * table is referencing a non-existing CPU - which can happen
* with broken ACPI tables. */ * with broken ACPI tables. */
......
...@@ -242,6 +242,15 @@ struct pci_dev *pcistub_get_pci_dev(struct xen_pcibk_device *pdev, ...@@ -242,6 +242,15 @@ struct pci_dev *pcistub_get_pci_dev(struct xen_pcibk_device *pdev,
return found_dev; return found_dev;
} }
/*
* Called when:
* - XenBus state has been reconfigure (pci unplug). See xen_pcibk_remove_device
* - XenBus state has been disconnected (guest shutdown). See xen_pcibk_xenbus_remove
* - 'echo BDF > unbind' on pciback module with no guest attached. See pcistub_remove
* - 'echo BDF > unbind' with a guest still using it. See pcistub_remove
*
* As such we have to be careful.
*/
void pcistub_put_pci_dev(struct pci_dev *dev) void pcistub_put_pci_dev(struct pci_dev *dev)
{ {
struct pcistub_device *psdev, *found_psdev = NULL; struct pcistub_device *psdev, *found_psdev = NULL;
...@@ -272,16 +281,16 @@ void pcistub_put_pci_dev(struct pci_dev *dev) ...@@ -272,16 +281,16 @@ void pcistub_put_pci_dev(struct pci_dev *dev)
* and want to inhibit the user from fiddling with 'reset' * and want to inhibit the user from fiddling with 'reset'
*/ */
pci_reset_function(dev); pci_reset_function(dev);
pci_restore_state(psdev->dev); pci_restore_state(dev);
/* This disables the device. */ /* This disables the device. */
xen_pcibk_reset_device(found_psdev->dev); xen_pcibk_reset_device(dev);
/* And cleanup up our emulated fields. */ /* And cleanup up our emulated fields. */
xen_pcibk_config_free_dyn_fields(found_psdev->dev); xen_pcibk_config_reset_dev(dev);
xen_pcibk_config_reset_dev(found_psdev->dev); xen_pcibk_config_free_dyn_fields(dev);
xen_unregister_device_domain_owner(found_psdev->dev); xen_unregister_device_domain_owner(dev);
spin_lock_irqsave(&found_psdev->lock, flags); spin_lock_irqsave(&found_psdev->lock, flags);
found_psdev->pdev = NULL; found_psdev->pdev = NULL;
...@@ -493,6 +502,8 @@ static int pcistub_seize(struct pci_dev *dev) ...@@ -493,6 +502,8 @@ static int pcistub_seize(struct pci_dev *dev)
return err; return err;
} }
/* Called when 'bind'. This means we must _NOT_ call pci_reset_function or
* other functions that take the sysfs lock. */
static int pcistub_probe(struct pci_dev *dev, const struct pci_device_id *id) static int pcistub_probe(struct pci_dev *dev, const struct pci_device_id *id)
{ {
int err = 0; int err = 0;
...@@ -520,6 +531,8 @@ static int pcistub_probe(struct pci_dev *dev, const struct pci_device_id *id) ...@@ -520,6 +531,8 @@ static int pcistub_probe(struct pci_dev *dev, const struct pci_device_id *id)
return err; return err;
} }
/* Called when 'unbind'. This means we must _NOT_ call pci_reset_function or
* other functions that take the sysfs lock. */
static void pcistub_remove(struct pci_dev *dev) static void pcistub_remove(struct pci_dev *dev)
{ {
struct pcistub_device *psdev, *found_psdev = NULL; struct pcistub_device *psdev, *found_psdev = NULL;
...@@ -551,6 +564,8 @@ static void pcistub_remove(struct pci_dev *dev) ...@@ -551,6 +564,8 @@ static void pcistub_remove(struct pci_dev *dev)
pr_warn("****** shutdown driver domain before binding device\n"); pr_warn("****** shutdown driver domain before binding device\n");
pr_warn("****** to other drivers or domains\n"); pr_warn("****** to other drivers or domains\n");
/* N.B. This ends up calling pcistub_put_pci_dev which ends up
* doing the FLR. */
xen_pcibk_release_pci_dev(found_psdev->pdev, xen_pcibk_release_pci_dev(found_psdev->pdev,
found_psdev->dev); found_psdev->dev);
} }
......
...@@ -93,6 +93,8 @@ static void free_pdev(struct xen_pcibk_device *pdev) ...@@ -93,6 +93,8 @@ static void free_pdev(struct xen_pcibk_device *pdev)
xen_pcibk_disconnect(pdev); xen_pcibk_disconnect(pdev);
/* N.B. This calls pcistub_put_pci_dev which does the FLR on all
* of the PCIe devices. */
xen_pcibk_release_devices(pdev); xen_pcibk_release_devices(pdev);
dev_set_drvdata(&pdev->xdev->dev, NULL); dev_set_drvdata(&pdev->xdev->dev, NULL);
...@@ -286,6 +288,8 @@ static int xen_pcibk_remove_device(struct xen_pcibk_device *pdev, ...@@ -286,6 +288,8 @@ static int xen_pcibk_remove_device(struct xen_pcibk_device *pdev,
dev_dbg(&dev->dev, "unregistering for %d\n", pdev->xdev->otherend_id); dev_dbg(&dev->dev, "unregistering for %d\n", pdev->xdev->otherend_id);
xen_unregister_device_domain_owner(dev); xen_unregister_device_domain_owner(dev);
/* N.B. This ends up calling pcistub_put_pci_dev which ends up
* doing the FLR. */
xen_pcibk_release_pci_dev(pdev, dev); xen_pcibk_release_pci_dev(pdev, dev);
out: out:
......
...@@ -275,9 +275,9 @@ DEFINE_GUEST_HANDLE_STRUCT(mmu_update); ...@@ -275,9 +275,9 @@ DEFINE_GUEST_HANDLE_STRUCT(mmu_update);
* NB. The fields are natural register size for this architecture. * NB. The fields are natural register size for this architecture.
*/ */
struct multicall_entry { struct multicall_entry {
unsigned long op; xen_ulong_t op;
long result; xen_long_t result;
unsigned long args[6]; xen_ulong_t args[6];
}; };
DEFINE_GUEST_HANDLE_STRUCT(multicall_entry); DEFINE_GUEST_HANDLE_STRUCT(multicall_entry);
......
...@@ -9,10 +9,6 @@ DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu); ...@@ -9,10 +9,6 @@ DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu);
void xen_arch_pre_suspend(void); void xen_arch_pre_suspend(void);
void xen_arch_post_suspend(int suspend_cancelled); void xen_arch_post_suspend(int suspend_cancelled);
void xen_arch_hvm_post_suspend(int suspend_cancelled);
void xen_mm_pin_all(void);
void xen_mm_unpin_all(void);
void xen_timer_resume(void); void xen_timer_resume(void);
void xen_arch_resume(void); void xen_arch_resume(void);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册