提交 8d3d53b3 编写于 作者: A Andrew J. Bennieston 提交者: David S. Miller

xen-netback: Add support for multiple queues

Builds on the refactoring of the previous patch to implement multiple
queues between xen-netfront and xen-netback.

Writes the maximum supported number of queues into XenStore, and reads
the values written by the frontend to determine how many queues to use.

Ring references and event channels are read from XenStore on a per-queue
basis and rings are connected accordingly.

Also adds code to handle the cleanup of any already initialised queues
if the initialisation of a subsequent queue fails.
Signed-off-by: NAndrew J. Bennieston <andrew.bennieston@citrix.com>
Acked-by: NWei Liu <wei.liu2@citrix.com>
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
上级 e9ce7cb6
...@@ -237,6 +237,7 @@ struct xenvif *xenvif_alloc(struct device *parent, ...@@ -237,6 +237,7 @@ struct xenvif *xenvif_alloc(struct device *parent,
unsigned int handle); unsigned int handle);
int xenvif_init_queue(struct xenvif_queue *queue); int xenvif_init_queue(struct xenvif_queue *queue);
void xenvif_deinit_queue(struct xenvif_queue *queue);
int xenvif_connect(struct xenvif_queue *queue, unsigned long tx_ring_ref, int xenvif_connect(struct xenvif_queue *queue, unsigned long tx_ring_ref,
unsigned long rx_ring_ref, unsigned int tx_evtchn, unsigned long rx_ring_ref, unsigned int tx_evtchn,
...@@ -299,5 +300,6 @@ extern bool separate_tx_rx_irq; ...@@ -299,5 +300,6 @@ extern bool separate_tx_rx_irq;
extern unsigned int rx_drain_timeout_msecs; extern unsigned int rx_drain_timeout_msecs;
extern unsigned int rx_drain_timeout_jiffies; extern unsigned int rx_drain_timeout_jiffies;
extern unsigned int xenvif_max_queues;
#endif /* __XEN_NETBACK__COMMON_H__ */ #endif /* __XEN_NETBACK__COMMON_H__ */
...@@ -139,7 +139,6 @@ static void xenvif_wake_queue_callback(unsigned long data) ...@@ -139,7 +139,6 @@ static void xenvif_wake_queue_callback(unsigned long data)
static u16 xenvif_select_queue(struct net_device *dev, struct sk_buff *skb, static u16 xenvif_select_queue(struct net_device *dev, struct sk_buff *skb,
void *accel_priv, select_queue_fallback_t fallback) void *accel_priv, select_queue_fallback_t fallback)
{ {
struct xenvif *vif = netdev_priv(dev);
unsigned int num_queues = dev->real_num_tx_queues; unsigned int num_queues = dev->real_num_tx_queues;
u32 hash; u32 hash;
u16 queue_index; u16 queue_index;
...@@ -436,7 +435,12 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid, ...@@ -436,7 +435,12 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid,
char name[IFNAMSIZ] = {}; char name[IFNAMSIZ] = {};
snprintf(name, IFNAMSIZ - 1, "vif%u.%u", domid, handle); snprintf(name, IFNAMSIZ - 1, "vif%u.%u", domid, handle);
dev = alloc_netdev_mq(sizeof(struct xenvif), name, ether_setup, 1); /* Allocate a netdev with the max. supported number of queues.
* When the guest selects the desired number, it will be updated
* via netif_set_real_num_tx_queues().
*/
dev = alloc_netdev_mq(sizeof(struct xenvif), name, ether_setup,
xenvif_max_queues);
if (dev == NULL) { if (dev == NULL) {
pr_warn("Could not allocate netdev for %s\n", name); pr_warn("Could not allocate netdev for %s\n", name);
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
...@@ -706,6 +710,16 @@ void xenvif_disconnect(struct xenvif *vif) ...@@ -706,6 +710,16 @@ void xenvif_disconnect(struct xenvif *vif)
} }
} }
/* Reverse the relevant parts of xenvif_init_queue().
* Used for queue teardown from xenvif_free(), and on the
* error handling paths in xenbus.c:connect().
*/
void xenvif_deinit_queue(struct xenvif_queue *queue)
{
free_xenballooned_pages(MAX_PENDING_REQS, queue->mmap_pages);
netif_napi_del(&queue->napi);
}
void xenvif_free(struct xenvif *vif) void xenvif_free(struct xenvif *vif)
{ {
struct xenvif_queue *queue = NULL; struct xenvif_queue *queue = NULL;
...@@ -729,11 +743,8 @@ void xenvif_free(struct xenvif *vif) ...@@ -729,11 +743,8 @@ void xenvif_free(struct xenvif *vif)
for (queue_index = 0; queue_index < num_queues; ++queue_index) { for (queue_index = 0; queue_index < num_queues; ++queue_index) {
queue = &vif->queues[queue_index]; queue = &vif->queues[queue_index];
xenvif_wait_unmap_timeout(queue, worst_case_skb_lifetime); xenvif_wait_unmap_timeout(queue, worst_case_skb_lifetime);
free_xenballooned_pages(MAX_PENDING_REQS, queue->mmap_pages); xenvif_deinit_queue(queue);
netif_napi_del(&queue->napi);
} }
/* Free the array of queues. The call below does not require /* Free the array of queues. The call below does not require
......
...@@ -62,6 +62,11 @@ unsigned int rx_drain_timeout_msecs = 10000; ...@@ -62,6 +62,11 @@ unsigned int rx_drain_timeout_msecs = 10000;
module_param(rx_drain_timeout_msecs, uint, 0444); module_param(rx_drain_timeout_msecs, uint, 0444);
unsigned int rx_drain_timeout_jiffies; unsigned int rx_drain_timeout_jiffies;
unsigned int xenvif_max_queues;
module_param_named(max_queues, xenvif_max_queues, uint, 0644);
MODULE_PARM_DESC(max_queues,
"Maximum number of queues per virtual interface");
/* /*
* This is the maximum slots a skb can have. If a guest sends a skb * This is the maximum slots a skb can have. If a guest sends a skb
* which exceeds this limit it is considered malicious. * which exceeds this limit it is considered malicious.
...@@ -1953,6 +1958,9 @@ static int __init netback_init(void) ...@@ -1953,6 +1958,9 @@ static int __init netback_init(void)
if (!xen_domain()) if (!xen_domain())
return -ENODEV; return -ENODEV;
/* Allow as many queues as there are CPUs, by default */
xenvif_max_queues = num_online_cpus();
if (fatal_skb_slots < XEN_NETBK_LEGACY_SLOTS_MAX) { if (fatal_skb_slots < XEN_NETBK_LEGACY_SLOTS_MAX) {
pr_info("fatal_skb_slots too small (%d), bump it to XEN_NETBK_LEGACY_SLOTS_MAX (%d)\n", pr_info("fatal_skb_slots too small (%d), bump it to XEN_NETBK_LEGACY_SLOTS_MAX (%d)\n",
fatal_skb_slots, XEN_NETBK_LEGACY_SLOTS_MAX); fatal_skb_slots, XEN_NETBK_LEGACY_SLOTS_MAX);
......
...@@ -160,6 +160,12 @@ static int netback_probe(struct xenbus_device *dev, ...@@ -160,6 +160,12 @@ static int netback_probe(struct xenbus_device *dev,
if (err) if (err)
pr_debug("Error writing feature-split-event-channels\n"); pr_debug("Error writing feature-split-event-channels\n");
/* Multi-queue support: This is an optional feature. */
err = xenbus_printf(XBT_NIL, dev->nodename,
"multi-queue-max-queues", "%u", xenvif_max_queues);
if (err)
pr_debug("Error writing multi-queue-max-queues\n");
err = xenbus_switch_state(dev, XenbusStateInitWait); err = xenbus_switch_state(dev, XenbusStateInitWait);
if (err) if (err)
goto fail; goto fail;
...@@ -490,9 +496,25 @@ static void connect(struct backend_info *be) ...@@ -490,9 +496,25 @@ static void connect(struct backend_info *be)
struct xenbus_device *dev = be->dev; struct xenbus_device *dev = be->dev;
unsigned long credit_bytes, credit_usec; unsigned long credit_bytes, credit_usec;
unsigned int queue_index; unsigned int queue_index;
unsigned int requested_num_queues = 1; unsigned int requested_num_queues;
struct xenvif_queue *queue; struct xenvif_queue *queue;
/* Check whether the frontend requested multiple queues
* and read the number requested.
*/
err = xenbus_scanf(XBT_NIL, dev->otherend,
"multi-queue-num-queues",
"%u", &requested_num_queues);
if (err < 0) {
requested_num_queues = 1; /* Fall back to single queue */
} else if (requested_num_queues > xenvif_max_queues) {
/* buggy or malicious guest */
xenbus_dev_fatal(dev, err,
"guest requested %u queues, exceeding the maximum of %u.",
requested_num_queues, xenvif_max_queues);
return;
}
err = xen_net_read_mac(dev, be->vif->fe_dev_addr); err = xen_net_read_mac(dev, be->vif->fe_dev_addr);
if (err) { if (err) {
xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename); xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename);
...@@ -502,6 +524,7 @@ static void connect(struct backend_info *be) ...@@ -502,6 +524,7 @@ static void connect(struct backend_info *be)
xen_net_read_rate(dev, &credit_bytes, &credit_usec); xen_net_read_rate(dev, &credit_bytes, &credit_usec);
read_xenbus_vif_flags(be); read_xenbus_vif_flags(be);
/* Use the number of queues requested by the frontend */
be->vif->queues = vzalloc(requested_num_queues * be->vif->queues = vzalloc(requested_num_queues *
sizeof(struct xenvif_queue)); sizeof(struct xenvif_queue));
rtnl_lock(); rtnl_lock();
...@@ -516,14 +539,33 @@ static void connect(struct backend_info *be) ...@@ -516,14 +539,33 @@ static void connect(struct backend_info *be)
be->vif->dev->name, queue->id); be->vif->dev->name, queue->id);
err = xenvif_init_queue(queue); err = xenvif_init_queue(queue);
if (err) if (err) {
/* xenvif_init_queue() cleans up after itself on
* failure, but we need to clean up any previously
* initialised queues. Set num_queues to i so that
* earlier queues can be destroyed using the regular
* disconnect logic.
*/
rtnl_lock();
netif_set_real_num_tx_queues(be->vif->dev, queue_index);
rtnl_unlock();
goto err; goto err;
}
queue->remaining_credit = credit_bytes; queue->remaining_credit = credit_bytes;
err = connect_rings(be, queue); err = connect_rings(be, queue);
if (err) if (err) {
/* connect_rings() cleans up after itself on failure,
* but we need to clean up after xenvif_init_queue() here,
* and also clean up any previously initialised queues.
*/
xenvif_deinit_queue(queue);
rtnl_lock();
netif_set_real_num_tx_queues(be->vif->dev, queue_index);
rtnl_unlock();
goto err; goto err;
}
} }
xenvif_carrier_on(be->vif); xenvif_carrier_on(be->vif);
...@@ -540,6 +582,8 @@ static void connect(struct backend_info *be) ...@@ -540,6 +582,8 @@ static void connect(struct backend_info *be)
return; return;
err: err:
if (be->vif->dev->real_num_tx_queues > 0)
xenvif_disconnect(be->vif); /* Clean up existing queues */
vfree(be->vif->queues); vfree(be->vif->queues);
be->vif->queues = NULL; be->vif->queues = NULL;
rtnl_lock(); rtnl_lock();
...@@ -552,32 +596,62 @@ static void connect(struct backend_info *be) ...@@ -552,32 +596,62 @@ static void connect(struct backend_info *be)
static int connect_rings(struct backend_info *be, struct xenvif_queue *queue) static int connect_rings(struct backend_info *be, struct xenvif_queue *queue)
{ {
struct xenbus_device *dev = be->dev; struct xenbus_device *dev = be->dev;
unsigned int num_queues = queue->vif->dev->real_num_tx_queues;
unsigned long tx_ring_ref, rx_ring_ref; unsigned long tx_ring_ref, rx_ring_ref;
unsigned int tx_evtchn, rx_evtchn; unsigned int tx_evtchn, rx_evtchn;
int err; int err;
char *xspath;
size_t xspathsize;
const size_t xenstore_path_ext_size = 11; /* sufficient for "/queue-NNN" */
/* If the frontend requested 1 queue, or we have fallen back
* to single queue due to lack of frontend support for multi-
* queue, expect the remaining XenStore keys in the toplevel
* directory. Otherwise, expect them in a subdirectory called
* queue-N.
*/
if (num_queues == 1) {
xspath = kzalloc(strlen(dev->otherend) + 1, GFP_KERNEL);
if (!xspath) {
xenbus_dev_fatal(dev, -ENOMEM,
"reading ring references");
return -ENOMEM;
}
strcpy(xspath, dev->otherend);
} else {
xspathsize = strlen(dev->otherend) + xenstore_path_ext_size;
xspath = kzalloc(xspathsize, GFP_KERNEL);
if (!xspath) {
xenbus_dev_fatal(dev, -ENOMEM,
"reading ring references");
return -ENOMEM;
}
snprintf(xspath, xspathsize, "%s/queue-%u", dev->otherend,
queue->id);
}
err = xenbus_gather(XBT_NIL, dev->otherend, err = xenbus_gather(XBT_NIL, xspath,
"tx-ring-ref", "%lu", &tx_ring_ref, "tx-ring-ref", "%lu", &tx_ring_ref,
"rx-ring-ref", "%lu", &rx_ring_ref, NULL); "rx-ring-ref", "%lu", &rx_ring_ref, NULL);
if (err) { if (err) {
xenbus_dev_fatal(dev, err, xenbus_dev_fatal(dev, err,
"reading %s/ring-ref", "reading %s/ring-ref",
dev->otherend); xspath);
return err; goto err;
} }
/* Try split event channels first, then single event channel. */ /* Try split event channels first, then single event channel. */
err = xenbus_gather(XBT_NIL, dev->otherend, err = xenbus_gather(XBT_NIL, xspath,
"event-channel-tx", "%u", &tx_evtchn, "event-channel-tx", "%u", &tx_evtchn,
"event-channel-rx", "%u", &rx_evtchn, NULL); "event-channel-rx", "%u", &rx_evtchn, NULL);
if (err < 0) { if (err < 0) {
err = xenbus_scanf(XBT_NIL, dev->otherend, err = xenbus_scanf(XBT_NIL, xspath,
"event-channel", "%u", &tx_evtchn); "event-channel", "%u", &tx_evtchn);
if (err < 0) { if (err < 0) {
xenbus_dev_fatal(dev, err, xenbus_dev_fatal(dev, err,
"reading %s/event-channel(-tx/rx)", "reading %s/event-channel(-tx/rx)",
dev->otherend); xspath);
return err; goto err;
} }
rx_evtchn = tx_evtchn; rx_evtchn = tx_evtchn;
} }
...@@ -590,10 +664,13 @@ static int connect_rings(struct backend_info *be, struct xenvif_queue *queue) ...@@ -590,10 +664,13 @@ static int connect_rings(struct backend_info *be, struct xenvif_queue *queue)
"mapping shared-frames %lu/%lu port tx %u rx %u", "mapping shared-frames %lu/%lu port tx %u rx %u",
tx_ring_ref, rx_ring_ref, tx_ring_ref, rx_ring_ref,
tx_evtchn, rx_evtchn); tx_evtchn, rx_evtchn);
return err; goto err;
} }
return 0; err = 0;
err: /* Regular return falls through with err == 0 */
kfree(xspath);
return err;
} }
static int read_xenbus_vif_flags(struct backend_info *be) static int read_xenbus_vif_flags(struct backend_info *be)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册