diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c index 950cde6d6e58384083202b40af0921ef31e0e0ca..ba9373f82ab5fa9fe62276cc27333447a22475ba 100644 --- a/Documentation/lguest/lguest.c +++ b/Documentation/lguest/lguest.c @@ -42,6 +42,7 @@ #include #include "linux/lguest_launcher.h" #include "linux/virtio_config.h" +#include #include "linux/virtio_net.h" #include "linux/virtio_blk.h" #include "linux/virtio_console.h" @@ -133,6 +134,9 @@ struct device { /* Is it operational */ bool running; + /* Does Guest want an intrrupt on empty? */ + bool irq_on_empty; + /* Device-specific data. */ void *priv; }; @@ -623,10 +627,13 @@ static void trigger_irq(struct virtqueue *vq) return; vq->pending_used = 0; - /* If they don't want an interrupt, don't send one, unless empty. */ - if ((vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT) - && lg_last_avail(vq) != vq->vring.avail->idx) - return; + /* If they don't want an interrupt, don't send one... */ + if (vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT) { + /* ... unless they've asked us to force one on empty. */ + if (!vq->dev->irq_on_empty + || lg_last_avail(vq) != vq->vring.avail->idx) + return; + } /* Send the Guest an interrupt tell them we used something up. */ if (write(lguest_fd, buf, sizeof(buf)) != 0) @@ -1042,6 +1049,15 @@ static void create_thread(struct virtqueue *vq) close(vq->eventfd); } +static bool accepted_feature(struct device *dev, unsigned int bit) +{ + const u8 *features = get_feature_bits(dev) + dev->feature_len; + + if (dev->feature_len < bit / CHAR_BIT) + return false; + return features[bit / CHAR_BIT] & (1 << (bit % CHAR_BIT)); +} + static void start_device(struct device *dev) { unsigned int i; @@ -1055,6 +1071,8 @@ static void start_device(struct device *dev) verbose(" %02x", get_feature_bits(dev) [dev->feature_len+i]); + dev->irq_on_empty = accepted_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY); + for (vq = dev->vq; vq; vq = vq->next) { if (vq->service) create_thread(vq); diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 4cb7d5d18b8e691b9a2123060e9d701a3d34472e..7e59dc1d3fc2f6fcba984f1d729c65fb59f44b35 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -1135,11 +1135,6 @@ static struct notifier_block paniced = { /* Setting up memory is fairly easy. */ static __init char *lguest_memory_setup(void) { - /* We do this here and not earlier because lockcheck used to barf if we - * did it before start_kernel(). I think we fixed that, so it'd be - * nice to move it back to lguest_init. Patch welcome... */ - atomic_notifier_chain_register(&panic_notifier_list, &paniced); - /* *The Linux bootloader header contains an "e820" memory map: the * Launcher populated the first entry with our memory limit. @@ -1364,10 +1359,13 @@ __init void lguest_init(void) /* * If we don't initialize the lock dependency checker now, it crashes - * paravirt_disable_iospace. + * atomic_notifier_chain_register, then paravirt_disable_iospace. */ lockdep_init(); + /* Hook in our special panic hypercall code. */ + atomic_notifier_chain_register(&panic_notifier_list, &paniced); + /* * The IDE code spends about 3 seconds probing for disks: if we reserve * all the I/O ports up front it can't get them and so doesn't probe. diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index aa89fe45237d516907cacffa238985e98fdb0fec..43f19389647a99b4efb7e78ce556e94153189197 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -91,15 +92,26 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk, return false; vbr->req = req; - if (blk_fs_request(vbr->req)) { + switch (req->cmd_type) { + case REQ_TYPE_FS: vbr->out_hdr.type = 0; vbr->out_hdr.sector = blk_rq_pos(vbr->req); vbr->out_hdr.ioprio = req_get_ioprio(vbr->req); - } else if (blk_pc_request(vbr->req)) { + break; + case REQ_TYPE_BLOCK_PC: vbr->out_hdr.type = VIRTIO_BLK_T_SCSI_CMD; vbr->out_hdr.sector = 0; vbr->out_hdr.ioprio = req_get_ioprio(vbr->req); - } else { + break; + case REQ_TYPE_LINUX_BLOCK: + if (req->cmd[0] == REQ_LB_OP_FLUSH) { + vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH; + vbr->out_hdr.sector = 0; + vbr->out_hdr.ioprio = req_get_ioprio(vbr->req); + break; + } + /*FALLTHRU*/ + default: /* We don't put anything else in the queue. */ BUG(); } @@ -139,7 +151,7 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk, } } - if (vblk->vq->vq_ops->add_buf(vblk->vq, vblk->sg, out, in, vbr)) { + if (vblk->vq->vq_ops->add_buf(vblk->vq, vblk->sg, out, in, vbr) < 0) { mempool_free(vbr, vblk->pool); return false; } @@ -199,6 +211,12 @@ static int virtblk_identify(struct gendisk *disk, void *argp) return err; } +static void virtblk_prepare_flush(struct request_queue *q, struct request *req) +{ + req->cmd_type = REQ_TYPE_LINUX_BLOCK; + req->cmd[0] = REQ_LB_OP_FLUSH; +} + static int virtblk_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, unsigned long data) { @@ -337,7 +355,10 @@ static int __devinit virtblk_probe(struct virtio_device *vdev) index++; /* If barriers are supported, tell block layer that queue is ordered */ - if (virtio_has_feature(vdev, VIRTIO_BLK_F_BARRIER)) + if (virtio_has_feature(vdev, VIRTIO_BLK_F_FLUSH)) + blk_queue_ordered(vblk->disk->queue, QUEUE_ORDERED_DRAIN_FLUSH, + virtblk_prepare_flush); + else if (virtio_has_feature(vdev, VIRTIO_BLK_F_BARRIER)) blk_queue_ordered(vblk->disk->queue, QUEUE_ORDERED_TAG, NULL); /* If disk is read-only in the host, the guest should obey */ @@ -424,7 +445,7 @@ static struct virtio_device_id id_table[] = { static unsigned int features[] = { VIRTIO_BLK_F_BARRIER, VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY, VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, - VIRTIO_BLK_F_SCSI, VIRTIO_BLK_F_IDENTIFY + VIRTIO_BLK_F_SCSI, VIRTIO_BLK_F_IDENTIFY, VIRTIO_BLK_F_FLUSH }; /* diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c index 32216b623248497b1bade240792aa0e77a0c080e..962968f05b9421dd0182677a874be7795cce0990 100644 --- a/drivers/char/hw_random/virtio-rng.c +++ b/drivers/char/hw_random/virtio-rng.c @@ -21,6 +21,7 @@ #include #include #include +#include #include /* The host will fill any buffer we give it with sweet, sweet randomness. We @@ -51,7 +52,7 @@ static void register_buffer(void) sg_init_one(&sg, random_data+data_left, RANDOM_DATA_SIZE-data_left); /* There should always be room for one buffer. */ - if (vq->vq_ops->add_buf(vq, &sg, 0, 1, random_data) != 0) + if (vq->vq_ops->add_buf(vq, &sg, 0, 1, random_data) < 0) BUG(); vq->vq_ops->kick(vq); } diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index c74dacfa67950f870dcc4ffd7a71fa18dc915b27..0d328b59568d46f33c7b6e44c676c6709dea2371 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include "hvc_console.h" @@ -65,7 +66,7 @@ static int put_chars(u32 vtermno, const char *buf, int count) /* add_buf wants a token to identify this buffer: we hand it any * non-NULL pointer, since there's only ever one buffer. */ - if (out_vq->vq_ops->add_buf(out_vq, sg, 1, 0, (void *)1) == 0) { + if (out_vq->vq_ops->add_buf(out_vq, sg, 1, 0, (void *)1) >= 0) { /* Tell Host to go! */ out_vq->vq_ops->kick(out_vq); /* Chill out until it's done with the buffer. */ @@ -85,7 +86,7 @@ static void add_inbuf(void) sg_init_one(sg, inbuf, PAGE_SIZE); /* We should always be able to add one buffer to an empty queue. */ - if (in_vq->vq_ops->add_buf(in_vq, sg, 0, 1, inbuf) != 0) + if (in_vq->vq_ops->add_buf(in_vq, sg, 0, 1, inbuf) < 0) BUG(); in_vq->vq_ops->kick(in_vq); } diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c index 1e2cb846b3c9d7b593d3dd6e5b588a1f3f227991..8744d24ac6e639213e4366cc7dd12d06ec039a4e 100644 --- a/drivers/lguest/core.c +++ b/drivers/lguest/core.c @@ -67,12 +67,11 @@ static __init int map_switcher(void) * so we make sure they're zeroed. */ for (i = 0; i < TOTAL_SWITCHER_PAGES; i++) { - unsigned long addr = get_zeroed_page(GFP_KERNEL); - if (!addr) { + switcher_page[i] = alloc_page(GFP_KERNEL|__GFP_ZERO); + if (!switcher_page[i]) { err = -ENOMEM; goto free_some_pages; } - switcher_page[i] = virt_to_page(addr); } /* diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c index 8aaad65c3bb592b35af2eb259bea80d26cbfb742..cf94326f1b597f1b46ccc3ddc996b5aa0ef84f5f 100644 --- a/drivers/lguest/page_tables.c +++ b/drivers/lguest/page_tables.c @@ -380,7 +380,7 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode) * And we copy the flags to the shadow PMD entry. The page * number in the shadow PMD is the page we just allocated. */ - native_set_pmd(spmd, __pmd(__pa(ptepage) | pmd_flags(gpmd))); + set_pmd(spmd, __pmd(__pa(ptepage) | pmd_flags(gpmd))); } /* @@ -447,7 +447,7 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode) * we will come back here when a write does actually occur, so * we can update the Guest's _PAGE_DIRTY flag. */ - native_set_pte(spte, gpte_to_spte(cpu, pte_wrprotect(gpte), 0)); + set_pte(spte, gpte_to_spte(cpu, pte_wrprotect(gpte), 0)); /* * Finally, we write the Guest PTE entry back: we've set the @@ -528,7 +528,7 @@ static void release_pmd(pmd_t *spmd) /* Now we can free the page of PTEs */ free_page((long)ptepage); /* And zero out the PMD entry so we never release it twice. */ - native_set_pmd(spmd, __pmd(0)); + set_pmd(spmd, __pmd(0)); } } @@ -833,15 +833,15 @@ static void do_set_pte(struct lg_cpu *cpu, int idx, */ if (pte_flags(gpte) & (_PAGE_DIRTY | _PAGE_ACCESSED)) { check_gpte(cpu, gpte); - native_set_pte(spte, - gpte_to_spte(cpu, gpte, + set_pte(spte, + gpte_to_spte(cpu, gpte, pte_flags(gpte) & _PAGE_DIRTY)); } else { /* * Otherwise kill it and we can demand_page() * it in later. */ - native_set_pte(spte, __pte(0)); + set_pte(spte, __pte(0)); } #ifdef CONFIG_X86_PAE } @@ -983,25 +983,22 @@ static unsigned long setup_pagetables(struct lguest *lg, */ for (i = j = 0; i < mapped_pages && j < PTRS_PER_PMD; i += PTRS_PER_PTE, j++) { - /* FIXME: native_set_pmd is overkill here. */ - native_set_pmd(&pmd, __pmd(((unsigned long)(linear + i) - - mem_base) | _PAGE_PRESENT | _PAGE_RW | _PAGE_USER)); + pmd = pfn_pmd(((unsigned long)&linear[i] - mem_base)/PAGE_SIZE, + __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER)); if (copy_to_user(&pmds[j], &pmd, sizeof(pmd)) != 0) return -EFAULT; } /* One PGD entry, pointing to that PMD page. */ - set_pgd(&pgd, __pgd(((u32)pmds - mem_base) | _PAGE_PRESENT)); + pgd = __pgd(((unsigned long)pmds - mem_base) | _PAGE_PRESENT); /* Copy it in as the first PGD entry (ie. addresses 0-1G). */ if (copy_to_user(&pgdir[0], &pgd, sizeof(pgd)) != 0) return -EFAULT; /* - * And the third PGD entry (ie. addresses 3G-4G). - * - * FIXME: This assumes that PAGE_OFFSET for the Guest is 0xC0000000. + * And the other PGD entry to make the linear mapping at PAGE_OFFSET */ - if (copy_to_user(&pgdir[3], &pgd, sizeof(pgd)) != 0) + if (copy_to_user(&pgdir[KERNEL_PGD_BOUNDARY], &pgd, sizeof(pgd))) return -EFAULT; #else /* @@ -1141,15 +1138,13 @@ void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages) { pte_t *switcher_pte_page = __get_cpu_var(switcher_pte_pages); pte_t regs_pte; - unsigned long pfn; #ifdef CONFIG_X86_PAE pmd_t switcher_pmd; pmd_t *pmd_table; - /* FIXME: native_set_pmd is overkill here. */ - native_set_pmd(&switcher_pmd, pfn_pmd(__pa(switcher_pte_page) >> - PAGE_SHIFT, PAGE_KERNEL_EXEC)); + switcher_pmd = pfn_pmd(__pa(switcher_pte_page) >> PAGE_SHIFT, + PAGE_KERNEL_EXEC); /* Figure out where the pmd page is, by reading the PGD, and converting * it to a virtual address. */ @@ -1157,7 +1152,7 @@ void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages) pgdirs[cpu->cpu_pgd].pgdir[SWITCHER_PGD_INDEX]) << PAGE_SHIFT); /* Now write it into the shadow page table. */ - native_set_pmd(&pmd_table[SWITCHER_PMD_INDEX], switcher_pmd); + set_pmd(&pmd_table[SWITCHER_PMD_INDEX], switcher_pmd); #else pgd_t switcher_pgd; @@ -1179,10 +1174,8 @@ void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages) * page is already mapped there, we don't have to copy them out * again. */ - pfn = __pa(cpu->regs_page) >> PAGE_SHIFT; - native_set_pte(®s_pte, pfn_pte(pfn, PAGE_KERNEL)); - native_set_pte(&switcher_pte_page[pte_index((unsigned long)pages)], - regs_pte); + regs_pte = pfn_pte(__pa(cpu->regs_page) >> PAGE_SHIFT, PAGE_KERNEL); + set_pte(&switcher_pte_page[pte_index((unsigned long)pages)], regs_pte); } /*:*/ @@ -1209,7 +1202,7 @@ static __init void populate_switcher_pte_page(unsigned int cpu, /* The first entries are easy: they map the Switcher code. */ for (i = 0; i < pages; i++) { - native_set_pte(&pte[i], mk_pte(switcher_page[i], + set_pte(&pte[i], mk_pte(switcher_page[i], __pgprot(_PAGE_PRESENT|_PAGE_ACCESSED))); } @@ -1217,14 +1210,14 @@ static __init void populate_switcher_pte_page(unsigned int cpu, i = pages + cpu*2; /* First page (Guest registers) is writable from the Guest */ - native_set_pte(&pte[i], pfn_pte(page_to_pfn(switcher_page[i]), + set_pte(&pte[i], pfn_pte(page_to_pfn(switcher_page[i]), __pgprot(_PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_RW))); /* * The second page contains the "struct lguest_ro_state", and is * read-only. */ - native_set_pte(&pte[i+1], pfn_pte(page_to_pfn(switcher_page[i+1]), + set_pte(&pte[i+1], pfn_pte(page_to_pfn(switcher_page[i+1]), __pgprot(_PAGE_PRESENT|_PAGE_ACCESSED))); } diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 32266fb89c2022204c4b7fd396b4139c53f69f0d..5c498d2b043f45f59135160e8edd424658c01e18 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -320,7 +321,7 @@ static bool try_fill_recv_maxbufs(struct virtnet_info *vi, gfp_t gfp) skb_queue_head(&vi->recv, skb); err = vi->rvq->vq_ops->add_buf(vi->rvq, sg, 0, num, skb); - if (err) { + if (err < 0) { skb_unlink(skb, &vi->recv); trim_pages(vi, skb); kfree_skb(skb); @@ -373,7 +374,7 @@ static bool try_fill_recv(struct virtnet_info *vi, gfp_t gfp) skb_queue_head(&vi->recv, skb); err = vi->rvq->vq_ops->add_buf(vi->rvq, sg, 0, 1, skb); - if (err) { + if (err < 0) { skb_unlink(skb, &vi->recv); kfree_skb(skb); break; @@ -527,7 +528,7 @@ static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb) num = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1; err = vi->svq->vq_ops->add_buf(vi->svq, sg, num, 0, skb); - if (!err && !vi->free_in_tasklet) + if (err >= 0 && !vi->free_in_tasklet) mod_timer(&vi->xmit_free_timer, jiffies + (HZ/10)); return err; @@ -538,7 +539,7 @@ static void xmit_tasklet(unsigned long data) struct virtnet_info *vi = (void *)data; netif_tx_lock_bh(vi->dev); - if (vi->last_xmit_skb && xmit_skb(vi, vi->last_xmit_skb) == 0) { + if (vi->last_xmit_skb && xmit_skb(vi, vi->last_xmit_skb) >= 0) { vi->svq->vq_ops->kick(vi->svq); vi->last_xmit_skb = NULL; } @@ -557,7 +558,7 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) /* If we has a buffer left over from last time, send it now. */ if (unlikely(vi->last_xmit_skb) && - xmit_skb(vi, vi->last_xmit_skb) != 0) + xmit_skb(vi, vi->last_xmit_skb) < 0) goto stop_queue; vi->last_xmit_skb = NULL; @@ -565,7 +566,7 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) /* Put new one in send queue and do transmit */ if (likely(skb)) { __skb_queue_head(&vi->send, skb); - if (xmit_skb(vi, skb) != 0) { + if (xmit_skb(vi, skb) < 0) { vi->last_xmit_skb = skb; skb = NULL; goto stop_queue; @@ -668,7 +669,7 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd, sg_set_buf(&sg[i + 1], sg_virt(s), s->length); sg_set_buf(&sg[out + in - 1], &status, sizeof(status)); - BUG_ON(vi->cvq->vq_ops->add_buf(vi->cvq, sg, out, in, vi)); + BUG_ON(vi->cvq->vq_ops->add_buf(vi->cvq, sg, out, in, vi) < 0); vi->cvq->vq_ops->kick(vi->cvq); diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 26b278264796b31a4b98403d728b1e9ccfd8257b..200c22f55130ba890d37a73a0e565460e8240315 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -19,6 +19,7 @@ */ //#define DEBUG #include +#include #include #include #include @@ -84,7 +85,7 @@ static void tell_host(struct virtio_balloon *vb, struct virtqueue *vq) init_completion(&vb->acked); /* We should always be able to add one buffer to an empty queue. */ - if (vq->vq_ops->add_buf(vq, &sg, 1, 0, vb) != 0) + if (vq->vq_ops->add_buf(vq, &sg, 1, 0, vb) < 0) BUG(); vq->vq_ops->kick(vq); diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c index 248e00ec4dc12fc32cc606eb02d4c67aa79b90d2..4a1f1ebff7bf232ab60f507331b60aa71b3903a3 100644 --- a/drivers/virtio/virtio_pci.c +++ b/drivers/virtio/virtio_pci.c @@ -84,7 +84,7 @@ struct virtio_pci_vq_info struct list_head node; /* MSI-X vector (or none) */ - unsigned vector; + unsigned msix_vector; }; /* Qumranet donated their vendor ID for devices 0x1000 thru 0x10FF. */ @@ -280,25 +280,14 @@ static void vp_free_vectors(struct virtio_device *vdev) vp_dev->msix_entries = NULL; } -static int vp_request_vectors(struct virtio_device *vdev, int nvectors, - bool per_vq_vectors) +static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors, + bool per_vq_vectors) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); const char *name = dev_name(&vp_dev->vdev.dev); unsigned i, v; int err = -ENOMEM; - if (!nvectors) { - /* Can't allocate MSI-X vectors, use regular interrupt */ - vp_dev->msix_vectors = 0; - err = request_irq(vp_dev->pci_dev->irq, vp_interrupt, - IRQF_SHARED, name, vp_dev); - if (err) - return err; - vp_dev->intx_enabled = 1; - return 0; - } - vp_dev->msix_entries = kmalloc(nvectors * sizeof *vp_dev->msix_entries, GFP_KERNEL); if (!vp_dev->msix_entries) @@ -311,6 +300,7 @@ static int vp_request_vectors(struct virtio_device *vdev, int nvectors, for (i = 0; i < nvectors; ++i) vp_dev->msix_entries[i].entry = i; + /* pci_enable_msix returns positive if we can't get this many. */ err = pci_enable_msix(vp_dev->pci_dev, vp_dev->msix_entries, nvectors); if (err > 0) err = -ENOSPC; @@ -356,10 +346,22 @@ static int vp_request_vectors(struct virtio_device *vdev, int nvectors, return err; } -static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index, - void (*callback)(struct virtqueue *vq), - const char *name, - u16 vector) +static int vp_request_intx(struct virtio_device *vdev) +{ + int err; + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + + err = request_irq(vp_dev->pci_dev->irq, vp_interrupt, + IRQF_SHARED, dev_name(&vdev->dev), vp_dev); + if (!err) + vp_dev->intx_enabled = 1; + return err; +} + +static struct virtqueue *setup_vq(struct virtio_device *vdev, unsigned index, + void (*callback)(struct virtqueue *vq), + const char *name, + u16 msix_vec) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); struct virtio_pci_vq_info *info; @@ -384,7 +386,7 @@ static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index, info->queue_index = index; info->num = num; - info->vector = vector; + info->msix_vector = msix_vec; size = PAGE_ALIGN(vring_size(num, VIRTIO_PCI_VRING_ALIGN)); info->queue = alloc_pages_exact(size, GFP_KERNEL|__GFP_ZERO); @@ -408,10 +410,10 @@ static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index, vq->priv = info; info->vq = vq; - if (vector != VIRTIO_MSI_NO_VECTOR) { - iowrite16(vector, vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR); - vector = ioread16(vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR); - if (vector == VIRTIO_MSI_NO_VECTOR) { + if (msix_vec != VIRTIO_MSI_NO_VECTOR) { + iowrite16(msix_vec, vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR); + msix_vec = ioread16(vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR); + if (msix_vec == VIRTIO_MSI_NO_VECTOR) { err = -EBUSY; goto out_assign; } @@ -472,7 +474,8 @@ static void vp_del_vqs(struct virtio_device *vdev) list_for_each_entry_safe(vq, n, &vdev->vqs, list) { info = vq->priv; if (vp_dev->per_vq_vectors) - free_irq(vp_dev->msix_entries[info->vector].vector, vq); + free_irq(vp_dev->msix_entries[info->msix_vector].vector, + vq); vp_del_vq(vq); } vp_dev->per_vq_vectors = false; @@ -484,38 +487,58 @@ static int vp_try_to_find_vqs(struct virtio_device *vdev, unsigned nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], const char *names[], - int nvectors, + bool use_msix, bool per_vq_vectors) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); - u16 vector; - int i, err, allocated_vectors; + u16 msix_vec; + int i, err, nvectors, allocated_vectors; - err = vp_request_vectors(vdev, nvectors, per_vq_vectors); - if (err) - goto error_request; + if (!use_msix) { + /* Old style: one normal interrupt for change and all vqs. */ + err = vp_request_intx(vdev); + if (err) + goto error_request; + } else { + if (per_vq_vectors) { + /* Best option: one for change interrupt, one per vq. */ + nvectors = 1; + for (i = 0; i < nvqs; ++i) + if (callbacks[i]) + ++nvectors; + } else { + /* Second best: one for change, shared for all vqs. */ + nvectors = 2; + } + + err = vp_request_msix_vectors(vdev, nvectors, per_vq_vectors); + if (err) + goto error_request; + } vp_dev->per_vq_vectors = per_vq_vectors; allocated_vectors = vp_dev->msix_used_vectors; for (i = 0; i < nvqs; ++i) { if (!callbacks[i] || !vp_dev->msix_enabled) - vector = VIRTIO_MSI_NO_VECTOR; + msix_vec = VIRTIO_MSI_NO_VECTOR; else if (vp_dev->per_vq_vectors) - vector = allocated_vectors++; + msix_vec = allocated_vectors++; else - vector = VP_MSIX_VQ_VECTOR; - vqs[i] = vp_find_vq(vdev, i, callbacks[i], names[i], vector); + msix_vec = VP_MSIX_VQ_VECTOR; + vqs[i] = setup_vq(vdev, i, callbacks[i], names[i], msix_vec); if (IS_ERR(vqs[i])) { err = PTR_ERR(vqs[i]); goto error_find; } /* allocate per-vq irq if available and necessary */ - if (vp_dev->per_vq_vectors && vector != VIRTIO_MSI_NO_VECTOR) { - snprintf(vp_dev->msix_names[vector], sizeof *vp_dev->msix_names, - "%s-%s", dev_name(&vp_dev->vdev.dev), names[i]); - err = request_irq(vp_dev->msix_entries[vector].vector, - vring_interrupt, 0, - vp_dev->msix_names[vector], vqs[i]); + if (vp_dev->per_vq_vectors) { + snprintf(vp_dev->msix_names[msix_vec], + sizeof *vp_dev->msix_names, + "%s-%s", + dev_name(&vp_dev->vdev.dev), names[i]); + err = request_irq(msix_vec, vring_interrupt, 0, + vp_dev->msix_names[msix_vec], + vqs[i]); if (err) { vp_del_vq(vqs[i]); goto error_find; @@ -537,28 +560,20 @@ static int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs, vq_callback_t *callbacks[], const char *names[]) { - int vectors = 0; - int i, uninitialized_var(err); - - /* How many vectors would we like? */ - for (i = 0; i < nvqs; ++i) - if (callbacks[i]) - ++vectors; + int err; - /* We want at most one vector per queue and one for config changes. */ - err = vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names, - vectors + 1, true); + /* Try MSI-X with one vector per queue. */ + err = vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names, true, true); if (!err) return 0; - /* Fallback to separate vectors for config and a shared for queues. */ + /* Fallback: MSI-X with one vector for config, one shared for queues. */ err = vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names, - 2, false); + true, false); if (!err) return 0; /* Finally fall back to regular interrupts. */ - err = vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names, - 0, false); - return err; + return vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names, + false, false); } static struct virtio_config_ops virtio_pci_config_ops = { diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index a882f2606515796334e403baa411ebcbe45c592a..f536005807269728f3285f7bbf6f176cd790e5c2 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -208,7 +208,11 @@ static int vring_add_buf(struct virtqueue *_vq, pr_debug("Added buffer head %i to %p\n", head, vq); END_USE(vq); - return 0; + + /* If we're indirect, we can fit many (assuming not OOM). */ + if (vq->indirect) + return vq->num_free ? vq->vring.num : 0; + return vq->num_free; } static void vring_kick(struct virtqueue *_vq) diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 4fca4f5440ba47c93d7f903b8395a7b98791a021..057a2e0107589763411cfeea1bd6e7e4b189a24c 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -34,7 +34,7 @@ struct virtqueue { * out_num: the number of sg readable by other side * in_num: the number of sg which are writable (after readable ones) * data: the token identifying the buffer. - * Returns 0 or an error. + * Returns remaining capacity of queue (sg segments) or a negative error. * @kick: update after add_buf * vq: the struct virtqueue * After one or more add_buf calls, invoke this to kick the other side. diff --git a/include/linux/virtio_9p.h b/include/linux/virtio_9p.h index b3c4a60ceeb305ffc9a670e57c08a580ab36798c..ea7226a45acbbf46a36684a8596e1734e5f52331 100644 --- a/include/linux/virtio_9p.h +++ b/include/linux/virtio_9p.h @@ -4,8 +4,6 @@ * compatible drivers/servers. */ #include -/* The ID for virtio console */ -#define VIRTIO_ID_9P 9 /* Maximum number of virtio channels per partition (1 for now) */ #define MAX_9P_CHAN 1 diff --git a/include/linux/virtio_balloon.h b/include/linux/virtio_balloon.h index 8726ff77763e47389e0cb61eec678c85228b06c6..09d730085060afc91d3a01d60c9698a274e57555 100644 --- a/include/linux/virtio_balloon.h +++ b/include/linux/virtio_balloon.h @@ -4,9 +4,6 @@ * compatible drivers/servers. */ #include -/* The ID for virtio_balloon */ -#define VIRTIO_ID_BALLOON 5 - /* The feature bitmap for virtio balloon */ #define VIRTIO_BALLOON_F_MUST_TELL_HOST 0 /* Tell before reclaiming pages */ diff --git a/include/linux/virtio_blk.h b/include/linux/virtio_blk.h index 8dab9f2b8832db74569f668a70edfd853274414c..15cb666581d7cbdb42cd7d39f4d309ac59399fb0 100644 --- a/include/linux/virtio_blk.h +++ b/include/linux/virtio_blk.h @@ -5,9 +5,6 @@ #include #include -/* The ID for virtio_block */ -#define VIRTIO_ID_BLOCK 2 - /* Feature bits */ #define VIRTIO_BLK_F_BARRIER 0 /* Does host support barriers? */ #define VIRTIO_BLK_F_SIZE_MAX 1 /* Indicates maximum segment size */ @@ -17,6 +14,7 @@ #define VIRTIO_BLK_F_BLK_SIZE 6 /* Block size of disk is available*/ #define VIRTIO_BLK_F_SCSI 7 /* Supports scsi command passthru */ #define VIRTIO_BLK_F_IDENTIFY 8 /* ATA IDENTIFY supported */ +#define VIRTIO_BLK_F_FLUSH 9 /* Cache flush command support */ #define VIRTIO_BLK_ID_BYTES (sizeof(__u16[256])) /* IDENTIFY DATA */ @@ -38,6 +36,17 @@ struct virtio_blk_config { __u8 identify[VIRTIO_BLK_ID_BYTES]; } __attribute__((packed)); +/* + * Command types + * + * Usage is a bit tricky as some bits are used as flags and some are not. + * + * Rules: + * VIRTIO_BLK_T_OUT may be combined with VIRTIO_BLK_T_SCSI_CMD or + * VIRTIO_BLK_T_BARRIER. VIRTIO_BLK_T_FLUSH is a command of its own + * and may not be combined with any of the other flags. + */ + /* These two define direction. */ #define VIRTIO_BLK_T_IN 0 #define VIRTIO_BLK_T_OUT 1 @@ -45,6 +54,9 @@ struct virtio_blk_config { /* This bit says it's a scsi command, not an actual read or write. */ #define VIRTIO_BLK_T_SCSI_CMD 2 +/* Cache flush command */ +#define VIRTIO_BLK_T_FLUSH 4 + /* Barrier before this op. */ #define VIRTIO_BLK_T_BARRIER 0x80000000 diff --git a/include/linux/virtio_console.h b/include/linux/virtio_console.h index dc161115ae35acdcfd3d54732f7f887194d9cbc1..b5f51980601445b79a13493cb8521a1c97692bb3 100644 --- a/include/linux/virtio_console.h +++ b/include/linux/virtio_console.h @@ -5,9 +5,6 @@ /* This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so * anyone can use the definitions to implement compatible drivers/servers. */ -/* The ID for virtio console */ -#define VIRTIO_ID_CONSOLE 3 - /* Feature bits */ #define VIRTIO_CONSOLE_F_SIZE 0 /* Does host provide console size? */ diff --git a/include/linux/virtio_ids.h b/include/linux/virtio_ids.h new file mode 100644 index 0000000000000000000000000000000000000000..06660c0a78d720e5d4b2155041c10b3800c9ffbd --- /dev/null +++ b/include/linux/virtio_ids.h @@ -0,0 +1,17 @@ +#ifndef _LINUX_VIRTIO_IDS_H +#define _LINUX_VIRTIO_IDS_H +/* + * Virtio IDs + * + * This header is BSD licensed so anyone can use the definitions to implement + * compatible drivers/servers. + */ + +#define VIRTIO_ID_NET 1 /* virtio net */ +#define VIRTIO_ID_BLOCK 2 /* virtio block */ +#define VIRTIO_ID_CONSOLE 3 /* virtio console */ +#define VIRTIO_ID_RNG 4 /* virtio ring */ +#define VIRTIO_ID_BALLOON 5 /* virtio balloon */ +#define VIRTIO_ID_9P 9 /* 9p virtio console */ + +#endif /* _LINUX_VIRTIO_IDS_H */ diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index d8dd539c9f48e602019c0b184087635c042e3b3d..1f41734bbb777b0147926ed0964a79d84fa6d026 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -6,9 +6,6 @@ #include #include -/* The ID for virtio_net */ -#define VIRTIO_ID_NET 1 - /* The feature bitmap for virtio net */ #define VIRTIO_NET_F_CSUM 0 /* Host handles pkts w/ partial csum */ #define VIRTIO_NET_F_GUEST_CSUM 1 /* Guest handles pkts w/ partial csum */ diff --git a/include/linux/virtio_rng.h b/include/linux/virtio_rng.h index 1a85dab8a940c31b66cb1a5a36de381418b20739..48121c3c434b6781661167debbf7b4a7540945fd 100644 --- a/include/linux/virtio_rng.h +++ b/include/linux/virtio_rng.h @@ -4,7 +4,4 @@ * compatible drivers/servers. */ #include -/* The ID for virtio_rng */ -#define VIRTIO_ID_RNG 4 - #endif /* _LINUX_VIRTIO_RNG_H */ diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index 9bf0b737aa517fde7bee51821be494e743a67cce..b2e07f0dd29844f046fef7acad2e6e1824d33d8b 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -43,6 +43,7 @@ #include #include #include +#include #include #define VIRTQUEUE_NUM 128 @@ -200,7 +201,7 @@ p9_virtio_request(struct p9_client *client, struct p9_req_t *req) req->status = REQ_STATUS_SENT; - if (chan->vq->vq_ops->add_buf(chan->vq, chan->sg, out, in, req->tc)) { + if (chan->vq->vq_ops->add_buf(chan->vq, chan->sg, out, in, req->tc) < 0) { P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio rpc add_buf returned failure"); return -EIO; @@ -334,8 +335,6 @@ static void p9_virtio_remove(struct virtio_device *vdev) } } -#define VIRTIO_ID_9P 9 - static struct virtio_device_id id_table[] = { { VIRTIO_ID_9P, VIRTIO_DEV_ANY_ID }, { 0 },