提交 5c69bed2 编写于 作者: L Linus Torvalds

Merge tag 'stable/for-linus-3.8-rc3-tag' of...

Merge tag 'stable/for-linus-3.8-rc3-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen

Pull Xen fixes from Konrad Rzeszutek Wilk:
 - CVE-2013-0190/XSA-40 (or stack corruption for 32-bit PV kernels)
 - Fix racy vma access spotted by Al Viro
 - Fix mmap batch ioctl potentially resulting in large O(n) page allcations.
 - Fix vcpu online/offline BUG:scheduling while atomic..
 - Fix unbound buffer scanning for more than 32 vCPUs.
 - Fix grant table being incorrectly initialized
 - Fix incorrect check in pciback
 - Allow privcmd in backend domains.

Fix up whitespace conflict due to ugly merge resolution in Xen tree in
arch/arm/xen/enlighten.c

* tag 'stable/for-linus-3.8-rc3-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen:
  xen: Fix stack corruption in xen_failsafe_callback for 32bit PVOPS guests.
  Revert "xen/smp: Fix CPU online/offline bug triggering a BUG: scheduling while atomic."
  xen/gntdev: remove erronous use of copy_to_user
  xen/gntdev: correctly unmap unlinked maps in mmu notifier
  xen/gntdev: fix unsafe vma access
  xen/privcmd: Fix mmap batch ioctl.
  Xen: properly bound buffer access when parsing cpu/*/availability
  xen/grant-table: correctly initialize grant table version 1
  x86/xen : Fix the wrong check in pciback
  xen/privcmd: Relax access control in privcmd_ioctl_mmap
......@@ -1065,7 +1065,6 @@ ENTRY(xen_failsafe_callback)
lea 16(%esp),%esp
CFI_ADJUST_CFA_OFFSET -16
jz 5f
addl $16,%esp
jmp iret_exc
5: pushl_cfi $-1 /* orig_ax = -1 => not a system call */
SAVE_ALL
......
......@@ -432,13 +432,6 @@ static void __cpuinit xen_play_dead(void) /* used only with HOTPLUG_CPU */
play_dead_common();
HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL);
cpu_bringup();
/*
* Balance out the preempt calls - as we are running in cpu_idle
* loop which has been called at bootup from cpu_bringup_and_idle.
* The cpucpu_bringup_and_idle called cpu_bringup which made a
* preempt_disable() So this preempt_enable will balance it out.
*/
preempt_enable();
}
#else /* !CONFIG_HOTPLUG_CPU */
......
......@@ -25,10 +25,10 @@ static void disable_hotplug_cpu(int cpu)
static int vcpu_online(unsigned int cpu)
{
int err;
char dir[32], state[32];
char dir[16], state[16];
sprintf(dir, "cpu/%u", cpu);
err = xenbus_scanf(XBT_NIL, dir, "availability", "%s", state);
err = xenbus_scanf(XBT_NIL, dir, "availability", "%15s", state);
if (err != 1) {
if (!xen_initial_domain())
printk(KERN_ERR "XENBUS: Unable to read cpu state\n");
......
......@@ -56,10 +56,15 @@ MODULE_PARM_DESC(limit, "Maximum number of grants that may be mapped by "
static atomic_t pages_mapped = ATOMIC_INIT(0);
static int use_ptemod;
#define populate_freeable_maps use_ptemod
struct gntdev_priv {
/* maps with visible offsets in the file descriptor */
struct list_head maps;
/* lock protects maps from concurrent changes */
/* maps that are not visible; will be freed on munmap.
* Only populated if populate_freeable_maps == 1 */
struct list_head freeable_maps;
/* lock protects maps and freeable_maps */
spinlock_t lock;
struct mm_struct *mm;
struct mmu_notifier mn;
......@@ -193,7 +198,7 @@ static struct grant_map *gntdev_find_map_index(struct gntdev_priv *priv,
return NULL;
}
static void gntdev_put_map(struct grant_map *map)
static void gntdev_put_map(struct gntdev_priv *priv, struct grant_map *map)
{
if (!map)
return;
......@@ -208,6 +213,12 @@ static void gntdev_put_map(struct grant_map *map)
evtchn_put(map->notify.event);
}
if (populate_freeable_maps && priv) {
spin_lock(&priv->lock);
list_del(&map->next);
spin_unlock(&priv->lock);
}
if (map->pages && !use_ptemod)
unmap_grant_pages(map, 0, map->count);
gntdev_free_map(map);
......@@ -301,17 +312,10 @@ static int __unmap_grant_pages(struct grant_map *map, int offset, int pages)
if (map->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) {
int pgno = (map->notify.addr >> PAGE_SHIFT);
if (pgno >= offset && pgno < offset + pages && use_ptemod) {
void __user *tmp = (void __user *)
map->vma->vm_start + map->notify.addr;
err = copy_to_user(tmp, &err, 1);
if (err)
return -EFAULT;
map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE;
} else if (pgno >= offset && pgno < offset + pages) {
uint8_t *tmp = kmap(map->pages[pgno]);
if (pgno >= offset && pgno < offset + pages) {
/* No need for kmap, pages are in lowmem */
uint8_t *tmp = pfn_to_kaddr(page_to_pfn(map->pages[pgno]));
tmp[map->notify.addr & (PAGE_SIZE-1)] = 0;
kunmap(map->pages[pgno]);
map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE;
}
}
......@@ -376,11 +380,24 @@ static void gntdev_vma_open(struct vm_area_struct *vma)
static void gntdev_vma_close(struct vm_area_struct *vma)
{
struct grant_map *map = vma->vm_private_data;
struct file *file = vma->vm_file;
struct gntdev_priv *priv = file->private_data;
pr_debug("gntdev_vma_close %p\n", vma);
if (use_ptemod) {
/* It is possible that an mmu notifier could be running
* concurrently, so take priv->lock to ensure that the vma won't
* vanishing during the unmap_grant_pages call, since we will
* spin here until that completes. Such a concurrent call will
* not do any unmapping, since that has been done prior to
* closing the vma, but it may still iterate the unmap_ops list.
*/
spin_lock(&priv->lock);
map->vma = NULL;
spin_unlock(&priv->lock);
}
vma->vm_private_data = NULL;
gntdev_put_map(map);
gntdev_put_map(priv, map);
}
static struct vm_operations_struct gntdev_vmops = {
......@@ -390,23 +407,18 @@ static struct vm_operations_struct gntdev_vmops = {
/* ------------------------------------------------------------------ */
static void mn_invl_range_start(struct mmu_notifier *mn,
struct mm_struct *mm,
static void unmap_if_in_range(struct grant_map *map,
unsigned long start, unsigned long end)
{
struct gntdev_priv *priv = container_of(mn, struct gntdev_priv, mn);
struct grant_map *map;
unsigned long mstart, mend;
int err;
spin_lock(&priv->lock);
list_for_each_entry(map, &priv->maps, next) {
if (!map->vma)
continue;
return;
if (map->vma->vm_start >= end)
continue;
return;
if (map->vma->vm_end <= start)
continue;
return;
mstart = max(start, map->vma->vm_start);
mend = min(end, map->vma->vm_end);
pr_debug("map %d+%d (%lx %lx), range %lx %lx, mrange %lx %lx\n",
......@@ -417,6 +429,21 @@ static void mn_invl_range_start(struct mmu_notifier *mn,
(mstart - map->vma->vm_start) >> PAGE_SHIFT,
(mend - mstart) >> PAGE_SHIFT);
WARN_ON(err);
}
static void mn_invl_range_start(struct mmu_notifier *mn,
struct mm_struct *mm,
unsigned long start, unsigned long end)
{
struct gntdev_priv *priv = container_of(mn, struct gntdev_priv, mn);
struct grant_map *map;
spin_lock(&priv->lock);
list_for_each_entry(map, &priv->maps, next) {
unmap_if_in_range(map, start, end);
}
list_for_each_entry(map, &priv->freeable_maps, next) {
unmap_if_in_range(map, start, end);
}
spin_unlock(&priv->lock);
}
......@@ -445,6 +472,15 @@ static void mn_release(struct mmu_notifier *mn,
err = unmap_grant_pages(map, /* offset */ 0, map->count);
WARN_ON(err);
}
list_for_each_entry(map, &priv->freeable_maps, next) {
if (!map->vma)
continue;
pr_debug("map %d+%d (%lx %lx)\n",
map->index, map->count,
map->vma->vm_start, map->vma->vm_end);
err = unmap_grant_pages(map, /* offset */ 0, map->count);
WARN_ON(err);
}
spin_unlock(&priv->lock);
}
......@@ -466,6 +502,7 @@ static int gntdev_open(struct inode *inode, struct file *flip)
return -ENOMEM;
INIT_LIST_HEAD(&priv->maps);
INIT_LIST_HEAD(&priv->freeable_maps);
spin_lock_init(&priv->lock);
if (use_ptemod) {
......@@ -500,8 +537,9 @@ static int gntdev_release(struct inode *inode, struct file *flip)
while (!list_empty(&priv->maps)) {
map = list_entry(priv->maps.next, struct grant_map, next);
list_del(&map->next);
gntdev_put_map(map);
gntdev_put_map(NULL /* already removed */, map);
}
WARN_ON(!list_empty(&priv->freeable_maps));
if (use_ptemod)
mmu_notifier_unregister(&priv->mn, priv->mm);
......@@ -529,14 +567,14 @@ static long gntdev_ioctl_map_grant_ref(struct gntdev_priv *priv,
if (unlikely(atomic_add_return(op.count, &pages_mapped) > limit)) {
pr_debug("can't map: over limit\n");
gntdev_put_map(map);
gntdev_put_map(NULL, map);
return err;
}
if (copy_from_user(map->grants, &u->refs,
sizeof(map->grants[0]) * op.count) != 0) {
gntdev_put_map(map);
return err;
gntdev_put_map(NULL, map);
return -EFAULT;
}
spin_lock(&priv->lock);
......@@ -565,11 +603,13 @@ static long gntdev_ioctl_unmap_grant_ref(struct gntdev_priv *priv,
map = gntdev_find_map_index(priv, op.index >> PAGE_SHIFT, op.count);
if (map) {
list_del(&map->next);
if (populate_freeable_maps)
list_add_tail(&map->next, &priv->freeable_maps);
err = 0;
}
spin_unlock(&priv->lock);
if (map)
gntdev_put_map(map);
gntdev_put_map(priv, map);
return err;
}
......@@ -579,25 +619,31 @@ static long gntdev_ioctl_get_offset_for_vaddr(struct gntdev_priv *priv,
struct ioctl_gntdev_get_offset_for_vaddr op;
struct vm_area_struct *vma;
struct grant_map *map;
int rv = -EINVAL;
if (copy_from_user(&op, u, sizeof(op)) != 0)
return -EFAULT;
pr_debug("priv %p, offset for vaddr %lx\n", priv, (unsigned long)op.vaddr);
down_read(&current->mm->mmap_sem);
vma = find_vma(current->mm, op.vaddr);
if (!vma || vma->vm_ops != &gntdev_vmops)
return -EINVAL;
goto out_unlock;
map = vma->vm_private_data;
if (!map)
return -EINVAL;
goto out_unlock;
op.offset = map->index << PAGE_SHIFT;
op.count = map->count;
rv = 0;
if (copy_to_user(u, &op, sizeof(op)) != 0)
out_unlock:
up_read(&current->mm->mmap_sem);
if (rv == 0 && copy_to_user(u, &op, sizeof(op)) != 0)
return -EFAULT;
return 0;
return rv;
}
static long gntdev_ioctl_notify(struct gntdev_priv *priv, void __user *u)
......@@ -778,7 +824,7 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
out_put_map:
if (use_ptemod)
map->vma = NULL;
gntdev_put_map(map);
gntdev_put_map(priv, map);
return err;
}
......
......@@ -56,10 +56,6 @@
/* External tools reserve first few grant table entries. */
#define NR_RESERVED_ENTRIES 8
#define GNTTAB_LIST_END 0xffffffff
#define GREFS_PER_GRANT_FRAME \
(grant_table_version == 1 ? \
(PAGE_SIZE / sizeof(struct grant_entry_v1)) : \
(PAGE_SIZE / sizeof(union grant_entry_v2)))
static grant_ref_t **gnttab_list;
static unsigned int nr_grant_frames;
......@@ -154,6 +150,7 @@ static struct gnttab_ops *gnttab_interface;
static grant_status_t *grstatus;
static int grant_table_version;
static int grefs_per_grant_frame;
static struct gnttab_free_callback *gnttab_free_callback_list;
......@@ -767,12 +764,14 @@ static int grow_gnttab_list(unsigned int more_frames)
unsigned int new_nr_grant_frames, extra_entries, i;
unsigned int nr_glist_frames, new_nr_glist_frames;
BUG_ON(grefs_per_grant_frame == 0);
new_nr_grant_frames = nr_grant_frames + more_frames;
extra_entries = more_frames * GREFS_PER_GRANT_FRAME;
extra_entries = more_frames * grefs_per_grant_frame;
nr_glist_frames = (nr_grant_frames * GREFS_PER_GRANT_FRAME + RPP - 1) / RPP;
nr_glist_frames = (nr_grant_frames * grefs_per_grant_frame + RPP - 1) / RPP;
new_nr_glist_frames =
(new_nr_grant_frames * GREFS_PER_GRANT_FRAME + RPP - 1) / RPP;
(new_nr_grant_frames * grefs_per_grant_frame + RPP - 1) / RPP;
for (i = nr_glist_frames; i < new_nr_glist_frames; i++) {
gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_ATOMIC);
if (!gnttab_list[i])
......@@ -780,12 +779,12 @@ static int grow_gnttab_list(unsigned int more_frames)
}
for (i = GREFS_PER_GRANT_FRAME * nr_grant_frames;
i < GREFS_PER_GRANT_FRAME * new_nr_grant_frames - 1; i++)
for (i = grefs_per_grant_frame * nr_grant_frames;
i < grefs_per_grant_frame * new_nr_grant_frames - 1; i++)
gnttab_entry(i) = i + 1;
gnttab_entry(i) = gnttab_free_head;
gnttab_free_head = GREFS_PER_GRANT_FRAME * nr_grant_frames;
gnttab_free_head = grefs_per_grant_frame * nr_grant_frames;
gnttab_free_count += extra_entries;
nr_grant_frames = new_nr_grant_frames;
......@@ -957,7 +956,8 @@ EXPORT_SYMBOL_GPL(gnttab_unmap_refs);
static unsigned nr_status_frames(unsigned nr_grant_frames)
{
return (nr_grant_frames * GREFS_PER_GRANT_FRAME + SPP - 1) / SPP;
BUG_ON(grefs_per_grant_frame == 0);
return (nr_grant_frames * grefs_per_grant_frame + SPP - 1) / SPP;
}
static int gnttab_map_frames_v1(xen_pfn_t *frames, unsigned int nr_gframes)
......@@ -1115,6 +1115,7 @@ static void gnttab_request_version(void)
rc = HYPERVISOR_grant_table_op(GNTTABOP_set_version, &gsv, 1);
if (rc == 0 && gsv.version == 2) {
grant_table_version = 2;
grefs_per_grant_frame = PAGE_SIZE / sizeof(union grant_entry_v2);
gnttab_interface = &gnttab_v2_ops;
} else if (grant_table_version == 2) {
/*
......@@ -1127,17 +1128,17 @@ static void gnttab_request_version(void)
panic("we need grant tables version 2, but only version 1 is available");
} else {
grant_table_version = 1;
grefs_per_grant_frame = PAGE_SIZE / sizeof(struct grant_entry_v1);
gnttab_interface = &gnttab_v1_ops;
}
printk(KERN_INFO "Grant tables using version %d layout.\n",
grant_table_version);
}
int gnttab_resume(void)
static int gnttab_setup(void)
{
unsigned int max_nr_gframes;
gnttab_request_version();
max_nr_gframes = gnttab_max_grant_frames();
if (max_nr_gframes < nr_grant_frames)
return -ENOSYS;
......@@ -1160,6 +1161,12 @@ int gnttab_resume(void)
return 0;
}
int gnttab_resume(void)
{
gnttab_request_version();
return gnttab_setup();
}
int gnttab_suspend(void)
{
gnttab_interface->unmap_frames();
......@@ -1171,9 +1178,10 @@ static int gnttab_expand(unsigned int req_entries)
int rc;
unsigned int cur, extra;
BUG_ON(grefs_per_grant_frame == 0);
cur = nr_grant_frames;
extra = ((req_entries + (GREFS_PER_GRANT_FRAME-1)) /
GREFS_PER_GRANT_FRAME);
extra = ((req_entries + (grefs_per_grant_frame-1)) /
grefs_per_grant_frame);
if (cur + extra > gnttab_max_grant_frames())
return -ENOSPC;
......@@ -1191,21 +1199,23 @@ int gnttab_init(void)
unsigned int nr_init_grefs;
int ret;
gnttab_request_version();
nr_grant_frames = 1;
boot_max_nr_grant_frames = __max_nr_grant_frames();
/* Determine the maximum number of frames required for the
* grant reference free list on the current hypervisor.
*/
BUG_ON(grefs_per_grant_frame == 0);
max_nr_glist_frames = (boot_max_nr_grant_frames *
GREFS_PER_GRANT_FRAME / RPP);
grefs_per_grant_frame / RPP);
gnttab_list = kmalloc(max_nr_glist_frames * sizeof(grant_ref_t *),
GFP_KERNEL);
if (gnttab_list == NULL)
return -ENOMEM;
nr_glist_frames = (nr_grant_frames * GREFS_PER_GRANT_FRAME + RPP - 1) / RPP;
nr_glist_frames = (nr_grant_frames * grefs_per_grant_frame + RPP - 1) / RPP;
for (i = 0; i < nr_glist_frames; i++) {
gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_KERNEL);
if (gnttab_list[i] == NULL) {
......@@ -1214,12 +1224,12 @@ int gnttab_init(void)
}
}
if (gnttab_resume() < 0) {
if (gnttab_setup() < 0) {
ret = -ENODEV;
goto ini_nomem;
}
nr_init_grefs = nr_grant_frames * GREFS_PER_GRANT_FRAME;
nr_init_grefs = nr_grant_frames * grefs_per_grant_frame;
for (i = NR_RESERVED_ENTRIES; i < nr_init_grefs - 1; i++)
gnttab_entry(i) = i + 1;
......
......@@ -199,9 +199,6 @@ static long privcmd_ioctl_mmap(void __user *udata)
LIST_HEAD(pagelist);
struct mmap_mfn_state state;
if (!xen_initial_domain())
return -EPERM;
/* We only support privcmd_ioctl_mmap_batch for auto translated. */
if (xen_feature(XENFEAT_auto_translated_physmap))
return -ENOSYS;
......@@ -261,11 +258,12 @@ struct mmap_batch_state {
* -ENOENT if at least 1 -ENOENT has happened.
*/
int global_error;
/* An array for individual errors */
int *err;
int version;
/* User-space mfn array to store errors in the second pass for V1. */
xen_pfn_t __user *user_mfn;
/* User-space int array to store errors in the second pass for V2. */
int __user *user_err;
};
/* auto translated dom0 note: if domU being created is PV, then mfn is
......@@ -288,7 +286,19 @@ static int mmap_batch_fn(void *data, void *state)
&cur_page);
/* Store error code for second pass. */
*(st->err++) = ret;
if (st->version == 1) {
if (ret < 0) {
/*
* V1 encodes the error codes in the 32bit top nibble of the
* mfn (with its known limitations vis-a-vis 64 bit callers).
*/
*mfnp |= (ret == -ENOENT) ?
PRIVCMD_MMAPBATCH_PAGED_ERROR :
PRIVCMD_MMAPBATCH_MFN_ERROR;
}
} else { /* st->version == 2 */
*((int *) mfnp) = ret;
}
/* And see if it affects the global_error. */
if (ret < 0) {
......@@ -305,20 +315,25 @@ static int mmap_batch_fn(void *data, void *state)
return 0;
}
static int mmap_return_errors_v1(void *data, void *state)
static int mmap_return_errors(void *data, void *state)
{
xen_pfn_t *mfnp = data;
struct mmap_batch_state *st = state;
int err = *(st->err++);
/*
* V1 encodes the error codes in the 32bit top nibble of the
* mfn (with its known limitations vis-a-vis 64 bit callers).
*/
*mfnp |= (err == -ENOENT) ?
PRIVCMD_MMAPBATCH_PAGED_ERROR :
PRIVCMD_MMAPBATCH_MFN_ERROR;
return __put_user(*mfnp, st->user_mfn++);
if (st->version == 1) {
xen_pfn_t mfnp = *((xen_pfn_t *) data);
if (mfnp & PRIVCMD_MMAPBATCH_MFN_ERROR)
return __put_user(mfnp, st->user_mfn++);
else
st->user_mfn++;
} else { /* st->version == 2 */
int err = *((int *) data);
if (err)
return __put_user(err, st->user_err++);
else
st->user_err++;
}
return 0;
}
/* Allocate pfns that are then mapped with gmfns from foreign domid. Update
......@@ -357,12 +372,8 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
struct vm_area_struct *vma;
unsigned long nr_pages;
LIST_HEAD(pagelist);
int *err_array = NULL;
struct mmap_batch_state state;
if (!xen_initial_domain())
return -EPERM;
switch (version) {
case 1:
if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch)))
......@@ -396,11 +407,13 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
goto out;
}
err_array = kcalloc(m.num, sizeof(int), GFP_KERNEL);
if (err_array == NULL) {
ret = -ENOMEM;
if (version == 2) {
/* Zero error array now to only copy back actual errors. */
if (clear_user(m.err, sizeof(int) * m.num)) {
ret = -EFAULT;
goto out;
}
}
down_write(&mm->mmap_sem);
......@@ -427,7 +440,7 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
state.va = m.addr;
state.index = 0;
state.global_error = 0;
state.err = err_array;
state.version = version;
/* mmap_batch_fn guarantees ret == 0 */
BUG_ON(traverse_pages(m.num, sizeof(xen_pfn_t),
......@@ -435,29 +448,21 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
up_write(&mm->mmap_sem);
if (version == 1) {
if (state.global_error) {
/* Write back errors in second pass. */
state.user_mfn = (xen_pfn_t *)m.arr;
state.err = err_array;
state.user_err = m.err;
ret = traverse_pages(m.num, sizeof(xen_pfn_t),
&pagelist, mmap_return_errors_v1, &state);
&pagelist, mmap_return_errors, &state);
} else
ret = 0;
} else if (version == 2) {
ret = __copy_to_user(m.err, err_array, m.num * sizeof(int));
if (ret)
ret = -EFAULT;
}
/* If we have not had any EFAULT-like global errors then set the global
* error to -ENOENT if necessary. */
if ((ret == 0) && (state.global_error == -ENOENT))
ret = -ENOENT;
out:
kfree(err_array);
free_page_list(&pagelist);
return ret;
......
......@@ -124,7 +124,7 @@ static inline int xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,
static inline void xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev,
struct pci_dev *dev)
{
if (xen_pcibk_backend && xen_pcibk_backend->free)
if (xen_pcibk_backend && xen_pcibk_backend->release)
return xen_pcibk_backend->release(pdev, dev);
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册