提交 038a75af 编写于 作者: J Jens Axboe

Merge branch 'stable/for-jens-4.5' of...

Merge branch 'stable/for-jens-4.5' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen into for-4.5/drivers

Konrad writes:

The pull is based on converting the backend driver into an multiqueue
driver and exposing more than one queue to the frontend. As such we had
to modify the frontend and also fix a bunch of bugs around this.

The original work is based on Arianna Avanzini's work as an OPW intern.
Bob took over the work and had been massaging it for quite some time.

Also included are are features to 64KB page support for ARM and various
bug-fixes.
此差异已折叠。
...@@ -46,6 +46,7 @@ ...@@ -46,6 +46,7 @@
#include <xen/interface/io/protocols.h> #include <xen/interface/io/protocols.h>
extern unsigned int xen_blkif_max_ring_order; extern unsigned int xen_blkif_max_ring_order;
extern unsigned int xenblk_max_queues;
/* /*
* This is the maximum number of segments that would be allowed in indirect * This is the maximum number of segments that would be allowed in indirect
* requests. This value will also be passed to the frontend. * requests. This value will also be passed to the frontend.
...@@ -269,68 +270,79 @@ struct persistent_gnt { ...@@ -269,68 +270,79 @@ struct persistent_gnt {
struct list_head remove_node; struct list_head remove_node;
}; };
struct xen_blkif { /* Per-ring information. */
/* Unique identifier for this interface. */ struct xen_blkif_ring {
domid_t domid;
unsigned int handle;
/* Physical parameters of the comms window. */ /* Physical parameters of the comms window. */
unsigned int irq; unsigned int irq;
/* Comms information. */
enum blkif_protocol blk_protocol;
union blkif_back_rings blk_rings; union blkif_back_rings blk_rings;
void *blk_ring; void *blk_ring;
/* The VBD attached to this interface. */
struct xen_vbd vbd;
/* Back pointer to the backend_info. */
struct backend_info *be;
/* Private fields. */ /* Private fields. */
spinlock_t blk_ring_lock; spinlock_t blk_ring_lock;
atomic_t refcnt;
wait_queue_head_t wq; wait_queue_head_t wq;
/* for barrier (drain) requests */
struct completion drain_complete;
atomic_t drain;
atomic_t inflight; atomic_t inflight;
/* One thread per one blkif. */ /* One thread per blkif ring. */
struct task_struct *xenblkd; struct task_struct *xenblkd;
unsigned int waiting_reqs; unsigned int waiting_reqs;
/* tree to store persistent grants */ /* List of all 'pending_req' available */
struct list_head pending_free;
/* And its spinlock. */
spinlock_t pending_free_lock;
wait_queue_head_t pending_free_wq;
/* Tree to store persistent grants. */
spinlock_t pers_gnts_lock;
struct rb_root persistent_gnts; struct rb_root persistent_gnts;
unsigned int persistent_gnt_c; unsigned int persistent_gnt_c;
atomic_t persistent_gnt_in_use; atomic_t persistent_gnt_in_use;
unsigned long next_lru; unsigned long next_lru;
/* used by the kworker that offload work from the persistent purge */ /* Statistics. */
unsigned long st_print;
unsigned long long st_rd_req;
unsigned long long st_wr_req;
unsigned long long st_oo_req;
unsigned long long st_f_req;
unsigned long long st_ds_req;
unsigned long long st_rd_sect;
unsigned long long st_wr_sect;
/* Used by the kworker that offload work from the persistent purge. */
struct list_head persistent_purge_list; struct list_head persistent_purge_list;
struct work_struct persistent_purge_work; struct work_struct persistent_purge_work;
/* buffer of free pages to map grant refs */ /* Buffer of free pages to map grant refs. */
spinlock_t free_pages_lock; spinlock_t free_pages_lock;
int free_pages_num; int free_pages_num;
struct list_head free_pages; struct list_head free_pages;
/* List of all 'pending_req' available */
struct list_head pending_free;
/* And its spinlock. */
spinlock_t pending_free_lock;
wait_queue_head_t pending_free_wq;
/* statistics */
unsigned long st_print;
unsigned long long st_rd_req;
unsigned long long st_wr_req;
unsigned long long st_oo_req;
unsigned long long st_f_req;
unsigned long long st_ds_req;
unsigned long long st_rd_sect;
unsigned long long st_wr_sect;
struct work_struct free_work; struct work_struct free_work;
/* Thread shutdown wait queue. */ /* Thread shutdown wait queue. */
wait_queue_head_t shutdown_wq; wait_queue_head_t shutdown_wq;
unsigned int nr_ring_pages; struct xen_blkif *blkif;
};
struct xen_blkif {
/* Unique identifier for this interface. */
domid_t domid;
unsigned int handle;
/* Comms information. */
enum blkif_protocol blk_protocol;
/* The VBD attached to this interface. */
struct xen_vbd vbd;
/* Back pointer to the backend_info. */
struct backend_info *be;
atomic_t refcnt;
/* for barrier (drain) requests */
struct completion drain_complete;
atomic_t drain;
struct work_struct free_work;
unsigned int nr_ring_pages;
/* All rings for this device. */
struct xen_blkif_ring *rings;
unsigned int nr_rings;
}; };
struct seg_buf { struct seg_buf {
...@@ -352,7 +364,7 @@ struct grant_page { ...@@ -352,7 +364,7 @@ struct grant_page {
* response queued for it, with the saved 'id' passed back. * response queued for it, with the saved 'id' passed back.
*/ */
struct pending_req { struct pending_req {
struct xen_blkif *blkif; struct xen_blkif_ring *ring;
u64 id; u64 id;
int nr_segs; int nr_segs;
atomic_t pendcnt; atomic_t pendcnt;
...@@ -394,7 +406,7 @@ int xen_blkif_xenbus_init(void); ...@@ -394,7 +406,7 @@ int xen_blkif_xenbus_init(void);
irqreturn_t xen_blkif_be_int(int irq, void *dev_id); irqreturn_t xen_blkif_be_int(int irq, void *dev_id);
int xen_blkif_schedule(void *arg); int xen_blkif_schedule(void *arg);
int xen_blkif_purge_persistent(void *arg); int xen_blkif_purge_persistent(void *arg);
void xen_blkbk_free_caches(struct xen_blkif *blkif); void xen_blkbk_free_caches(struct xen_blkif_ring *ring);
int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt, int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt,
struct backend_info *be, int state); struct backend_info *be, int state);
......
...@@ -86,9 +86,11 @@ static void xen_update_blkif_status(struct xen_blkif *blkif) ...@@ -86,9 +86,11 @@ static void xen_update_blkif_status(struct xen_blkif *blkif)
{ {
int err; int err;
char name[BLKBACK_NAME_LEN]; char name[BLKBACK_NAME_LEN];
struct xen_blkif_ring *ring;
int i;
/* Not ready to connect? */ /* Not ready to connect? */
if (!blkif->irq || !blkif->vbd.bdev) if (!blkif->rings || !blkif->rings[0].irq || !blkif->vbd.bdev)
return; return;
/* Already connected? */ /* Already connected? */
...@@ -113,13 +115,55 @@ static void xen_update_blkif_status(struct xen_blkif *blkif) ...@@ -113,13 +115,55 @@ static void xen_update_blkif_status(struct xen_blkif *blkif)
} }
invalidate_inode_pages2(blkif->vbd.bdev->bd_inode->i_mapping); invalidate_inode_pages2(blkif->vbd.bdev->bd_inode->i_mapping);
blkif->xenblkd = kthread_run(xen_blkif_schedule, blkif, "%s", name); for (i = 0; i < blkif->nr_rings; i++) {
if (IS_ERR(blkif->xenblkd)) { ring = &blkif->rings[i];
err = PTR_ERR(blkif->xenblkd); ring->xenblkd = kthread_run(xen_blkif_schedule, ring, "%s-%d", name, i);
blkif->xenblkd = NULL; if (IS_ERR(ring->xenblkd)) {
xenbus_dev_error(blkif->be->dev, err, "start xenblkd"); err = PTR_ERR(ring->xenblkd);
return; ring->xenblkd = NULL;
xenbus_dev_fatal(blkif->be->dev, err,
"start %s-%d xenblkd", name, i);
goto out;
}
}
return;
out:
while (--i >= 0) {
ring = &blkif->rings[i];
kthread_stop(ring->xenblkd);
}
return;
}
static int xen_blkif_alloc_rings(struct xen_blkif *blkif)
{
unsigned int r;
blkif->rings = kzalloc(blkif->nr_rings * sizeof(struct xen_blkif_ring), GFP_KERNEL);
if (!blkif->rings)
return -ENOMEM;
for (r = 0; r < blkif->nr_rings; r++) {
struct xen_blkif_ring *ring = &blkif->rings[r];
spin_lock_init(&ring->blk_ring_lock);
init_waitqueue_head(&ring->wq);
INIT_LIST_HEAD(&ring->pending_free);
INIT_LIST_HEAD(&ring->persistent_purge_list);
INIT_WORK(&ring->persistent_purge_work, xen_blkbk_unmap_purged_grants);
spin_lock_init(&ring->free_pages_lock);
INIT_LIST_HEAD(&ring->free_pages);
spin_lock_init(&ring->pending_free_lock);
init_waitqueue_head(&ring->pending_free_wq);
init_waitqueue_head(&ring->shutdown_wq);
ring->blkif = blkif;
ring->st_print = jiffies;
xen_blkif_get(blkif);
} }
return 0;
} }
static struct xen_blkif *xen_blkif_alloc(domid_t domid) static struct xen_blkif *xen_blkif_alloc(domid_t domid)
...@@ -133,41 +177,25 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid) ...@@ -133,41 +177,25 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
blkif->domid = domid; blkif->domid = domid;
spin_lock_init(&blkif->blk_ring_lock);
atomic_set(&blkif->refcnt, 1); atomic_set(&blkif->refcnt, 1);
init_waitqueue_head(&blkif->wq);
init_completion(&blkif->drain_complete); init_completion(&blkif->drain_complete);
atomic_set(&blkif->drain, 0);
blkif->st_print = jiffies;
blkif->persistent_gnts.rb_node = NULL;
spin_lock_init(&blkif->free_pages_lock);
INIT_LIST_HEAD(&blkif->free_pages);
INIT_LIST_HEAD(&blkif->persistent_purge_list);
blkif->free_pages_num = 0;
atomic_set(&blkif->persistent_gnt_in_use, 0);
atomic_set(&blkif->inflight, 0);
INIT_WORK(&blkif->persistent_purge_work, xen_blkbk_unmap_purged_grants);
INIT_LIST_HEAD(&blkif->pending_free);
INIT_WORK(&blkif->free_work, xen_blkif_deferred_free); INIT_WORK(&blkif->free_work, xen_blkif_deferred_free);
spin_lock_init(&blkif->pending_free_lock);
init_waitqueue_head(&blkif->pending_free_wq);
init_waitqueue_head(&blkif->shutdown_wq);
return blkif; return blkif;
} }
static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t *gref, static int xen_blkif_map(struct xen_blkif_ring *ring, grant_ref_t *gref,
unsigned int nr_grefs, unsigned int evtchn) unsigned int nr_grefs, unsigned int evtchn)
{ {
int err; int err;
struct xen_blkif *blkif = ring->blkif;
/* Already connected through? */ /* Already connected through? */
if (blkif->irq) if (ring->irq)
return 0; return 0;
err = xenbus_map_ring_valloc(blkif->be->dev, gref, nr_grefs, err = xenbus_map_ring_valloc(blkif->be->dev, gref, nr_grefs,
&blkif->blk_ring); &ring->blk_ring);
if (err < 0) if (err < 0)
return err; return err;
...@@ -175,24 +203,24 @@ static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t *gref, ...@@ -175,24 +203,24 @@ static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t *gref,
case BLKIF_PROTOCOL_NATIVE: case BLKIF_PROTOCOL_NATIVE:
{ {
struct blkif_sring *sring; struct blkif_sring *sring;
sring = (struct blkif_sring *)blkif->blk_ring; sring = (struct blkif_sring *)ring->blk_ring;
BACK_RING_INIT(&blkif->blk_rings.native, sring, BACK_RING_INIT(&ring->blk_rings.native, sring,
XEN_PAGE_SIZE * nr_grefs); XEN_PAGE_SIZE * nr_grefs);
break; break;
} }
case BLKIF_PROTOCOL_X86_32: case BLKIF_PROTOCOL_X86_32:
{ {
struct blkif_x86_32_sring *sring_x86_32; struct blkif_x86_32_sring *sring_x86_32;
sring_x86_32 = (struct blkif_x86_32_sring *)blkif->blk_ring; sring_x86_32 = (struct blkif_x86_32_sring *)ring->blk_ring;
BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, BACK_RING_INIT(&ring->blk_rings.x86_32, sring_x86_32,
XEN_PAGE_SIZE * nr_grefs); XEN_PAGE_SIZE * nr_grefs);
break; break;
} }
case BLKIF_PROTOCOL_X86_64: case BLKIF_PROTOCOL_X86_64:
{ {
struct blkif_x86_64_sring *sring_x86_64; struct blkif_x86_64_sring *sring_x86_64;
sring_x86_64 = (struct blkif_x86_64_sring *)blkif->blk_ring; sring_x86_64 = (struct blkif_x86_64_sring *)ring->blk_ring;
BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, BACK_RING_INIT(&ring->blk_rings.x86_64, sring_x86_64,
XEN_PAGE_SIZE * nr_grefs); XEN_PAGE_SIZE * nr_grefs);
break; break;
} }
...@@ -202,13 +230,13 @@ static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t *gref, ...@@ -202,13 +230,13 @@ static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t *gref,
err = bind_interdomain_evtchn_to_irqhandler(blkif->domid, evtchn, err = bind_interdomain_evtchn_to_irqhandler(blkif->domid, evtchn,
xen_blkif_be_int, 0, xen_blkif_be_int, 0,
"blkif-backend", blkif); "blkif-backend", ring);
if (err < 0) { if (err < 0) {
xenbus_unmap_ring_vfree(blkif->be->dev, blkif->blk_ring); xenbus_unmap_ring_vfree(blkif->be->dev, ring->blk_ring);
blkif->blk_rings.common.sring = NULL; ring->blk_rings.common.sring = NULL;
return err; return err;
} }
blkif->irq = err; ring->irq = err;
return 0; return 0;
} }
...@@ -216,50 +244,69 @@ static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t *gref, ...@@ -216,50 +244,69 @@ static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t *gref,
static int xen_blkif_disconnect(struct xen_blkif *blkif) static int xen_blkif_disconnect(struct xen_blkif *blkif)
{ {
struct pending_req *req, *n; struct pending_req *req, *n;
int i = 0, j; unsigned int j, r;
if (blkif->xenblkd) { for (r = 0; r < blkif->nr_rings; r++) {
kthread_stop(blkif->xenblkd); struct xen_blkif_ring *ring = &blkif->rings[r];
wake_up(&blkif->shutdown_wq); unsigned int i = 0;
blkif->xenblkd = NULL;
}
/* The above kthread_stop() guarantees that at this point we if (ring->xenblkd) {
* don't have any discard_io or other_io requests. So, checking kthread_stop(ring->xenblkd);
* for inflight IO is enough. wake_up(&ring->shutdown_wq);
*/ ring->xenblkd = NULL;
if (atomic_read(&blkif->inflight) > 0) }
return -EBUSY;
if (blkif->irq) { /* The above kthread_stop() guarantees that at this point we
unbind_from_irqhandler(blkif->irq, blkif); * don't have any discard_io or other_io requests. So, checking
blkif->irq = 0; * for inflight IO is enough.
} */
if (atomic_read(&ring->inflight) > 0)
return -EBUSY;
if (blkif->blk_rings.common.sring) { if (ring->irq) {
xenbus_unmap_ring_vfree(blkif->be->dev, blkif->blk_ring); unbind_from_irqhandler(ring->irq, ring);
blkif->blk_rings.common.sring = NULL; ring->irq = 0;
} }
/* Remove all persistent grants and the cache of ballooned pages. */ if (ring->blk_rings.common.sring) {
xen_blkbk_free_caches(blkif); xenbus_unmap_ring_vfree(blkif->be->dev, ring->blk_ring);
ring->blk_rings.common.sring = NULL;
}
/* Check that there is no request in use */ /* Remove all persistent grants and the cache of ballooned pages. */
list_for_each_entry_safe(req, n, &blkif->pending_free, free_list) { xen_blkbk_free_caches(ring);
list_del(&req->free_list);
for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) /* Check that there is no request in use */
kfree(req->segments[j]); list_for_each_entry_safe(req, n, &ring->pending_free, free_list) {
list_del(&req->free_list);
for (j = 0; j < MAX_INDIRECT_PAGES; j++) for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++)
kfree(req->indirect_pages[j]); kfree(req->segments[j]);
kfree(req); for (j = 0; j < MAX_INDIRECT_PAGES; j++)
i++; kfree(req->indirect_pages[j]);
}
kfree(req);
i++;
}
WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages)); BUG_ON(atomic_read(&ring->persistent_gnt_in_use) != 0);
BUG_ON(!list_empty(&ring->persistent_purge_list));
BUG_ON(!RB_EMPTY_ROOT(&ring->persistent_gnts));
BUG_ON(!list_empty(&ring->free_pages));
BUG_ON(ring->free_pages_num != 0);
BUG_ON(ring->persistent_gnt_c != 0);
WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages));
xen_blkif_put(blkif);
}
blkif->nr_ring_pages = 0; blkif->nr_ring_pages = 0;
/*
* blkif->rings was allocated in connect_ring, so we should free it in
* here.
*/
kfree(blkif->rings);
blkif->rings = NULL;
blkif->nr_rings = 0;
return 0; return 0;
} }
...@@ -271,13 +318,6 @@ static void xen_blkif_free(struct xen_blkif *blkif) ...@@ -271,13 +318,6 @@ static void xen_blkif_free(struct xen_blkif *blkif)
xen_vbd_free(&blkif->vbd); xen_vbd_free(&blkif->vbd);
/* Make sure everything is drained before shutting down */ /* Make sure everything is drained before shutting down */
BUG_ON(blkif->persistent_gnt_c != 0);
BUG_ON(atomic_read(&blkif->persistent_gnt_in_use) != 0);
BUG_ON(blkif->free_pages_num != 0);
BUG_ON(!list_empty(&blkif->persistent_purge_list));
BUG_ON(!list_empty(&blkif->free_pages));
BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts));
kmem_cache_free(xen_blkif_cachep, blkif); kmem_cache_free(xen_blkif_cachep, blkif);
} }
...@@ -296,25 +336,38 @@ int __init xen_blkif_interface_init(void) ...@@ -296,25 +336,38 @@ int __init xen_blkif_interface_init(void)
* sysfs interface for VBD I/O requests * sysfs interface for VBD I/O requests
*/ */
#define VBD_SHOW(name, format, args...) \ #define VBD_SHOW_ALLRING(name, format) \
static ssize_t show_##name(struct device *_dev, \ static ssize_t show_##name(struct device *_dev, \
struct device_attribute *attr, \ struct device_attribute *attr, \
char *buf) \ char *buf) \
{ \ { \
struct xenbus_device *dev = to_xenbus_device(_dev); \ struct xenbus_device *dev = to_xenbus_device(_dev); \
struct backend_info *be = dev_get_drvdata(&dev->dev); \ struct backend_info *be = dev_get_drvdata(&dev->dev); \
struct xen_blkif *blkif = be->blkif; \
unsigned int i; \
unsigned long long result = 0; \
\ \
return sprintf(buf, format, ##args); \ if (!blkif->rings) \
goto out; \
\
for (i = 0; i < blkif->nr_rings; i++) { \
struct xen_blkif_ring *ring = &blkif->rings[i]; \
\
result += ring->st_##name; \
} \
\
out: \
return sprintf(buf, format, result); \
} \ } \
static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL) static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
VBD_SHOW(oo_req, "%llu\n", be->blkif->st_oo_req); VBD_SHOW_ALLRING(oo_req, "%llu\n");
VBD_SHOW(rd_req, "%llu\n", be->blkif->st_rd_req); VBD_SHOW_ALLRING(rd_req, "%llu\n");
VBD_SHOW(wr_req, "%llu\n", be->blkif->st_wr_req); VBD_SHOW_ALLRING(wr_req, "%llu\n");
VBD_SHOW(f_req, "%llu\n", be->blkif->st_f_req); VBD_SHOW_ALLRING(f_req, "%llu\n");
VBD_SHOW(ds_req, "%llu\n", be->blkif->st_ds_req); VBD_SHOW_ALLRING(ds_req, "%llu\n");
VBD_SHOW(rd_sect, "%llu\n", be->blkif->st_rd_sect); VBD_SHOW_ALLRING(rd_sect, "%llu\n");
VBD_SHOW(wr_sect, "%llu\n", be->blkif->st_wr_sect); VBD_SHOW_ALLRING(wr_sect, "%llu\n");
static struct attribute *xen_vbdstat_attrs[] = { static struct attribute *xen_vbdstat_attrs[] = {
&dev_attr_oo_req.attr, &dev_attr_oo_req.attr,
...@@ -332,6 +385,18 @@ static struct attribute_group xen_vbdstat_group = { ...@@ -332,6 +385,18 @@ static struct attribute_group xen_vbdstat_group = {
.attrs = xen_vbdstat_attrs, .attrs = xen_vbdstat_attrs,
}; };
#define VBD_SHOW(name, format, args...) \
static ssize_t show_##name(struct device *_dev, \
struct device_attribute *attr, \
char *buf) \
{ \
struct xenbus_device *dev = to_xenbus_device(_dev); \
struct backend_info *be = dev_get_drvdata(&dev->dev); \
\
return sprintf(buf, format, ##args); \
} \
static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
VBD_SHOW(physical_device, "%x:%x\n", be->major, be->minor); VBD_SHOW(physical_device, "%x:%x\n", be->major, be->minor);
VBD_SHOW(mode, "%s\n", be->mode); VBD_SHOW(mode, "%s\n", be->mode);
...@@ -440,11 +505,11 @@ static int xen_blkbk_remove(struct xenbus_device *dev) ...@@ -440,11 +505,11 @@ static int xen_blkbk_remove(struct xenbus_device *dev)
dev_set_drvdata(&dev->dev, NULL); dev_set_drvdata(&dev->dev, NULL);
if (be->blkif) { if (be->blkif)
xen_blkif_disconnect(be->blkif); xen_blkif_disconnect(be->blkif);
xen_blkif_put(be->blkif);
}
/* Put the reference we set in xen_blkif_alloc(). */
xen_blkif_put(be->blkif);
kfree(be->mode); kfree(be->mode);
kfree(be); kfree(be);
return 0; return 0;
...@@ -553,6 +618,12 @@ static int xen_blkbk_probe(struct xenbus_device *dev, ...@@ -553,6 +618,12 @@ static int xen_blkbk_probe(struct xenbus_device *dev,
goto fail; goto fail;
} }
/* Multi-queue: advertise how many queues are supported by us.*/
err = xenbus_printf(XBT_NIL, dev->nodename,
"multi-queue-max-queues", "%u", xenblk_max_queues);
if (err)
pr_warn("Error writing multi-queue-max-queues\n");
/* setup back pointer */ /* setup back pointer */
be->blkif->be = be; be->blkif->be = be;
...@@ -708,8 +779,14 @@ static void frontend_changed(struct xenbus_device *dev, ...@@ -708,8 +779,14 @@ static void frontend_changed(struct xenbus_device *dev,
} }
err = connect_ring(be); err = connect_ring(be);
if (err) if (err) {
/*
* Clean up so that memory resources can be used by
* other devices. connect_ring reported already error.
*/
xen_blkif_disconnect(be->blkif);
break; break;
}
xen_update_blkif_status(be->blkif); xen_update_blkif_status(be->blkif);
break; break;
...@@ -825,50 +902,43 @@ static void connect(struct backend_info *be) ...@@ -825,50 +902,43 @@ static void connect(struct backend_info *be)
xenbus_transaction_end(xbt, 1); xenbus_transaction_end(xbt, 1);
} }
/*
static int connect_ring(struct backend_info *be) * Each ring may have multi pages, depends on "ring-page-order".
*/
static int read_per_ring_refs(struct xen_blkif_ring *ring, const char *dir)
{ {
struct xenbus_device *dev = be->dev;
unsigned int ring_ref[XENBUS_MAX_RING_GRANTS]; unsigned int ring_ref[XENBUS_MAX_RING_GRANTS];
unsigned int evtchn, nr_grefs, ring_page_order;
unsigned int pers_grants;
char protocol[64] = "";
struct pending_req *req, *n; struct pending_req *req, *n;
int err, i, j; int err, i, j;
struct xen_blkif *blkif = ring->blkif;
struct xenbus_device *dev = blkif->be->dev;
unsigned int ring_page_order, nr_grefs, evtchn;
pr_debug("%s %s\n", __func__, dev->otherend); err = xenbus_scanf(XBT_NIL, dir, "event-channel", "%u",
err = xenbus_scanf(XBT_NIL, dev->otherend, "event-channel", "%u",
&evtchn); &evtchn);
if (err != 1) { if (err != 1) {
err = -EINVAL; err = -EINVAL;
xenbus_dev_fatal(dev, err, "reading %s/event-channel", xenbus_dev_fatal(dev, err, "reading %s/event-channel", dir);
dev->otherend);
return err; return err;
} }
pr_info("event-channel %u\n", evtchn);
err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-page-order", "%u", err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-page-order", "%u",
&ring_page_order); &ring_page_order);
if (err != 1) { if (err != 1) {
err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-ref", err = xenbus_scanf(XBT_NIL, dir, "ring-ref", "%u", &ring_ref[0]);
"%u", &ring_ref[0]);
if (err != 1) { if (err != 1) {
err = -EINVAL; err = -EINVAL;
xenbus_dev_fatal(dev, err, "reading %s/ring-ref", xenbus_dev_fatal(dev, err, "reading %s/ring-ref", dir);
dev->otherend);
return err; return err;
} }
nr_grefs = 1; nr_grefs = 1;
pr_info("%s:using single page: ring-ref %d\n", dev->otherend,
ring_ref[0]);
} else { } else {
unsigned int i; unsigned int i;
if (ring_page_order > xen_blkif_max_ring_order) { if (ring_page_order > xen_blkif_max_ring_order) {
err = -EINVAL; err = -EINVAL;
xenbus_dev_fatal(dev, err, "%s/request %d ring page order exceed max:%d", xenbus_dev_fatal(dev, err, "%s/request %d ring page order exceed max:%d",
dev->otherend, ring_page_order, dir, ring_page_order,
xen_blkif_max_ring_order); xen_blkif_max_ring_order);
return err; return err;
} }
...@@ -878,52 +948,23 @@ static int connect_ring(struct backend_info *be) ...@@ -878,52 +948,23 @@ static int connect_ring(struct backend_info *be)
char ring_ref_name[RINGREF_NAME_LEN]; char ring_ref_name[RINGREF_NAME_LEN];
snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", i); snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", i);
err = xenbus_scanf(XBT_NIL, dev->otherend, ring_ref_name, err = xenbus_scanf(XBT_NIL, dir, ring_ref_name,
"%u", &ring_ref[i]); "%u", &ring_ref[i]);
if (err != 1) { if (err != 1) {
err = -EINVAL; err = -EINVAL;
xenbus_dev_fatal(dev, err, "reading %s/%s", xenbus_dev_fatal(dev, err, "reading %s/%s",
dev->otherend, ring_ref_name); dir, ring_ref_name);
return err; return err;
} }
pr_info("ring-ref%u: %u\n", i, ring_ref[i]);
} }
} }
blkif->nr_ring_pages = nr_grefs;
be->blkif->blk_protocol = BLKIF_PROTOCOL_DEFAULT;
err = xenbus_gather(XBT_NIL, dev->otherend, "protocol",
"%63s", protocol, NULL);
if (err)
strcpy(protocol, "unspecified, assuming default");
else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE))
be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE;
else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_32))
be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_32;
else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_64))
be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_64;
else {
xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol);
return -1;
}
err = xenbus_gather(XBT_NIL, dev->otherend,
"feature-persistent", "%u",
&pers_grants, NULL);
if (err)
pers_grants = 0;
be->blkif->vbd.feature_gnt_persistent = pers_grants;
be->blkif->vbd.overflow_max_grants = 0;
be->blkif->nr_ring_pages = nr_grefs;
pr_info("ring-pages:%d, event-channel %d, protocol %d (%s) %s\n",
nr_grefs, evtchn, be->blkif->blk_protocol, protocol,
pers_grants ? "persistent grants" : "");
for (i = 0; i < nr_grefs * XEN_BLKIF_REQS_PER_PAGE; i++) { for (i = 0; i < nr_grefs * XEN_BLKIF_REQS_PER_PAGE; i++) {
req = kzalloc(sizeof(*req), GFP_KERNEL); req = kzalloc(sizeof(*req), GFP_KERNEL);
if (!req) if (!req)
goto fail; goto fail;
list_add_tail(&req->free_list, &be->blkif->pending_free); list_add_tail(&req->free_list, &ring->pending_free);
for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) { for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) {
req->segments[j] = kzalloc(sizeof(*req->segments[0]), GFP_KERNEL); req->segments[j] = kzalloc(sizeof(*req->segments[0]), GFP_KERNEL);
if (!req->segments[j]) if (!req->segments[j])
...@@ -938,7 +979,7 @@ static int connect_ring(struct backend_info *be) ...@@ -938,7 +979,7 @@ static int connect_ring(struct backend_info *be)
} }
/* Map the shared frame, irq etc. */ /* Map the shared frame, irq etc. */
err = xen_blkif_map(be->blkif, ring_ref, nr_grefs, evtchn); err = xen_blkif_map(ring, ring_ref, nr_grefs, evtchn);
if (err) { if (err) {
xenbus_dev_fatal(dev, err, "mapping ring-ref port %u", evtchn); xenbus_dev_fatal(dev, err, "mapping ring-ref port %u", evtchn);
return err; return err;
...@@ -947,7 +988,7 @@ static int connect_ring(struct backend_info *be) ...@@ -947,7 +988,7 @@ static int connect_ring(struct backend_info *be)
return 0; return 0;
fail: fail:
list_for_each_entry_safe(req, n, &be->blkif->pending_free, free_list) { list_for_each_entry_safe(req, n, &ring->pending_free, free_list) {
list_del(&req->free_list); list_del(&req->free_list);
for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) { for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) {
if (!req->segments[j]) if (!req->segments[j])
...@@ -962,6 +1003,93 @@ static int connect_ring(struct backend_info *be) ...@@ -962,6 +1003,93 @@ static int connect_ring(struct backend_info *be)
kfree(req); kfree(req);
} }
return -ENOMEM; return -ENOMEM;
}
static int connect_ring(struct backend_info *be)
{
struct xenbus_device *dev = be->dev;
unsigned int pers_grants;
char protocol[64] = "";
int err, i;
char *xspath;
size_t xspathsize;
const size_t xenstore_path_ext_size = 11; /* sufficient for "/queue-NNN" */
unsigned int requested_num_queues = 0;
pr_debug("%s %s\n", __func__, dev->otherend);
be->blkif->blk_protocol = BLKIF_PROTOCOL_DEFAULT;
err = xenbus_gather(XBT_NIL, dev->otherend, "protocol",
"%63s", protocol, NULL);
if (err)
strcpy(protocol, "unspecified, assuming default");
else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE))
be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE;
else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_32))
be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_32;
else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_64))
be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_64;
else {
xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol);
return -ENOSYS;
}
err = xenbus_gather(XBT_NIL, dev->otherend,
"feature-persistent", "%u",
&pers_grants, NULL);
if (err)
pers_grants = 0;
be->blkif->vbd.feature_gnt_persistent = pers_grants;
be->blkif->vbd.overflow_max_grants = 0;
/*
* Read the number of hardware queues from frontend.
*/
err = xenbus_scanf(XBT_NIL, dev->otherend, "multi-queue-num-queues",
"%u", &requested_num_queues);
if (err < 0) {
requested_num_queues = 1;
} else {
if (requested_num_queues > xenblk_max_queues
|| requested_num_queues == 0) {
/* Buggy or malicious guest. */
xenbus_dev_fatal(dev, err,
"guest requested %u queues, exceeding the maximum of %u.",
requested_num_queues, xenblk_max_queues);
return -ENOSYS;
}
}
be->blkif->nr_rings = requested_num_queues;
if (xen_blkif_alloc_rings(be->blkif))
return -ENOMEM;
pr_info("%s: using %d queues, protocol %d (%s) %s\n", dev->nodename,
be->blkif->nr_rings, be->blkif->blk_protocol, protocol,
pers_grants ? "persistent grants" : "");
if (be->blkif->nr_rings == 1)
return read_per_ring_refs(&be->blkif->rings[0], dev->otherend);
else {
xspathsize = strlen(dev->otherend) + xenstore_path_ext_size;
xspath = kmalloc(xspathsize, GFP_KERNEL);
if (!xspath) {
xenbus_dev_fatal(dev, -ENOMEM, "reading ring references");
return -ENOMEM;
}
for (i = 0; i < be->blkif->nr_rings; i++) {
memset(xspath, 0, xspathsize);
snprintf(xspath, xspathsize, "%s/queue-%u", dev->otherend, i);
err = read_per_ring_refs(&be->blkif->rings[i], xspath);
if (err) {
kfree(xspath);
return err;
}
}
kfree(xspath);
}
return 0;
} }
static const struct xenbus_device_id xen_blkbk_ids[] = { static const struct xenbus_device_id xen_blkbk_ids[] = {
......
此差异已折叠。
...@@ -27,6 +27,54 @@ ...@@ -27,6 +27,54 @@
typedef uint16_t blkif_vdev_t; typedef uint16_t blkif_vdev_t;
typedef uint64_t blkif_sector_t; typedef uint64_t blkif_sector_t;
/*
* Multiple hardware queues/rings:
* If supported, the backend will write the key "multi-queue-max-queues" to
* the directory for that vbd, and set its value to the maximum supported
* number of queues.
* Frontends that are aware of this feature and wish to use it can write the
* key "multi-queue-num-queues" with the number they wish to use, which must be
* greater than zero, and no more than the value reported by the backend in
* "multi-queue-max-queues".
*
* For frontends requesting just one queue, the usual event-channel and
* ring-ref keys are written as before, simplifying the backend processing
* to avoid distinguishing between a frontend that doesn't understand the
* multi-queue feature, and one that does, but requested only one queue.
*
* Frontends requesting two or more queues must not write the toplevel
* event-channel and ring-ref keys, instead writing those keys under sub-keys
* having the name "queue-N" where N is the integer ID of the queue/ring for
* which those keys belong. Queues are indexed from zero.
* For example, a frontend with two queues must write the following set of
* queue-related keys:
*
* /local/domain/1/device/vbd/0/multi-queue-num-queues = "2"
* /local/domain/1/device/vbd/0/queue-0 = ""
* /local/domain/1/device/vbd/0/queue-0/ring-ref = "<ring-ref#0>"
* /local/domain/1/device/vbd/0/queue-0/event-channel = "<evtchn#0>"
* /local/domain/1/device/vbd/0/queue-1 = ""
* /local/domain/1/device/vbd/0/queue-1/ring-ref = "<ring-ref#1>"
* /local/domain/1/device/vbd/0/queue-1/event-channel = "<evtchn#1>"
*
* It is also possible to use multiple queues/rings together with
* feature multi-page ring buffer.
* For example, a frontend requests two queues/rings and the size of each ring
* buffer is two pages must write the following set of related keys:
*
* /local/domain/1/device/vbd/0/multi-queue-num-queues = "2"
* /local/domain/1/device/vbd/0/ring-page-order = "1"
* /local/domain/1/device/vbd/0/queue-0 = ""
* /local/domain/1/device/vbd/0/queue-0/ring-ref0 = "<ring-ref#0>"
* /local/domain/1/device/vbd/0/queue-0/ring-ref1 = "<ring-ref#1>"
* /local/domain/1/device/vbd/0/queue-0/event-channel = "<evtchn#0>"
* /local/domain/1/device/vbd/0/queue-1 = ""
* /local/domain/1/device/vbd/0/queue-1/ring-ref0 = "<ring-ref#2>"
* /local/domain/1/device/vbd/0/queue-1/ring-ref1 = "<ring-ref#3>"
* /local/domain/1/device/vbd/0/queue-1/event-channel = "<evtchn#1>"
*
*/
/* /*
* REQUEST CODES. * REQUEST CODES.
*/ */
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册