Merge branch 'xen-netback-next'

Zoltan Kiss says: ==================== xen-netback: Changes around carrier handling This series starts using carrier off as a way to purge packets when the guest is not able (or willing) to receive them. It is a much faster way to get rid of packets waiting for an overwhelmed guest. The first patch changes current netback code where it relies currently on netif_carrier_ok. The second turns off the carrier if the guest times out on a queue, and only turn it on again if that queue (or queues) resurrects. ==================== Signed-off-by: N Zoltan Kiss <zoltan.kiss@citrix.com> Signed-off-by: N David Vrabel <david.vrabel@citrix.com> Signed-off-by: N David S. Miller <davem@davemloft.net>

Merge branch 'xen-netback-next'
Zoltan Kiss says: ==================== xen-netback: Changes around carrier handling This series starts using carrier off as a way to purge packets when the guest is not able (or willing) to receive them. It is a much faster way to get rid of packets waiting for an overwhelmed guest. The first patch changes current netback code where it relies currently on netif_carrier_ok. The second turns off the carrier if the guest times out on a queue, and only turn it on again if that queue (or queues) resurrects. ==================== Signed-off-by: N Zoltan Kiss <zoltan.kiss@citrix.com> Signed-off-by: N David Vrabel <david.vrabel@citrix.com> Signed-off-by: N David S. Miller <davem@davemloft.net>
61675fea · David S. Miller · aef4f5b6 · f34a4cf9 · 61675fea · 61675fea
3 changed file
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -176,9 +176,9 @@ struct xenvif_queue { /* Per-queue data for xenvif */
 	struct xen_netif_rx_back_ring rx;
 	struct sk_buff_head rx_queue;
 	RING_IDX rx_last_skb_slots;
-	bool rx_queue_purge;
+	unsigned long status;

-	struct timer_list wake_queue;
+	struct timer_list rx_stalled;

 	struct gnttab_copy grant_copy_op[MAX_GRANT_COPY_OPS];

@@ -198,6 +198,20 @@ struct xenvif_queue { /* Per-queue data for xenvif */
 	struct xenvif_stats stats;
 };

+enum state_bit_shift {
+	/* This bit marks that the vif is connected */
+	VIF_STATUS_CONNECTED,
+	/* This bit signals the RX thread that queuing was stopped (in
+	 * start_xmit), and either the timer fired or an RX interrupt came
+	 */
+	QUEUE_STATUS_RX_PURGE_EVENT,
+	/* This bit tells the interrupt handler that this queue was the reason
+	 * for the carrier off, so it should kick the thread. Only queues which
+	 * brought it down can turn on the carrier.
+	 */
+	QUEUE_STATUS_RX_STALLED
+};
+
 struct xenvif {
 	/* Unique identifier for this interface. */
 	domid_t          domid;
@@ -220,6 +234,7 @@ struct xenvif {
 	 * frontend is rogue.
 	 */
 	bool disabled;
+	unsigned long status;

 	/* Queues */
 	struct xenvif_queue *queues;

--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -55,7 +55,8 @@ static inline void xenvif_stop_queue(struct xenvif_queue *queue)

 int xenvif_schedulable(struct xenvif *vif)
 {
-	return netif_running(vif->dev) && netif_carrier_ok(vif->dev);
+	return netif_running(vif->dev) &&
+		test_bit(VIF_STATUS_CONNECTED, &vif->status);
 }

 static irqreturn_t xenvif_tx_interrupt(int irq, void *dev_id)
@@ -77,8 +78,12 @@ int xenvif_poll(struct napi_struct *napi, int budget)
 	/* This vif is rogue, we pretend we've there is nothing to do
 	 * for this vif to deschedule it from NAPI. But this interface
 	 * will be turned off in thread context later.
+	 * Also, if a guest doesn't post enough slots to receive data on one of
+	 * its queues, the carrier goes down and NAPI is descheduled here so
+	 * the guest can't send more packets until it's ready to receive.
 	 */
-	if (unlikely(queue->vif->disabled)) {
+	if (unlikely(queue->vif->disabled ||
+		     !netif_carrier_ok(queue->vif->dev))) {
 		napi_complete(napi);
 		return 0;
 	}
@@ -96,7 +101,16 @@ int xenvif_poll(struct napi_struct *napi, int budget)
 static irqreturn_t xenvif_rx_interrupt(int irq, void *dev_id)
 {
 	struct xenvif_queue *queue = dev_id;
+	struct netdev_queue *net_queue =
+		netdev_get_tx_queue(queue->vif->dev, queue->id);

+	/* QUEUE_STATUS_RX_PURGE_EVENT is only set if either QDisc was off OR
+	 * the carrier went down and this queue was previously blocked
+	 */
+	if (unlikely(netif_tx_queue_stopped(net_queue) ||
+		     (!netif_carrier_ok(queue->vif->dev) &&
+		      test_bit(QUEUE_STATUS_RX_STALLED, &queue->status))))
+		set_bit(QUEUE_STATUS_RX_PURGE_EVENT, &queue->status);
 	xenvif_kick_thread(queue);

 	return IRQ_HANDLED;
@@ -124,16 +138,14 @@ void xenvif_wake_queue(struct xenvif_queue *queue)
 	netif_tx_wake_queue(netdev_get_tx_queue(dev, id));
 }

-/* Callback to wake the queue and drain it on timeout */
-static void xenvif_wake_queue_callback(unsigned long data)
+/* Callback to wake the queue's thread and turn the carrier off on timeout */
+static void xenvif_rx_stalled(unsigned long data)
 {
 	struct xenvif_queue *queue = (struct xenvif_queue *)data;

 	if (xenvif_queue_stopped(queue)) {
-		netdev_err(queue->vif->dev, "draining TX queue\n");
-		queue->rx_queue_purge = true;
+		set_bit(QUEUE_STATUS_RX_PURGE_EVENT, &queue->status);
 		xenvif_kick_thread(queue);
-		xenvif_wake_queue(queue);
 	}
 }

@@ -182,11 +194,11 @@ static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	 * drain.
 	 */
 	if (!xenvif_rx_ring_slots_available(queue, min_slots_needed)) {
-		queue->wake_queue.function = xenvif_wake_queue_callback;
-		queue->wake_queue.data = (unsigned long)queue;
+		queue->rx_stalled.function = xenvif_rx_stalled;
+		queue->rx_stalled.data = (unsigned long)queue;
 		xenvif_stop_queue(queue);
-		mod_timer(&queue->wake_queue,
-			jiffies + rx_drain_timeout_jiffies);
+		mod_timer(&queue->rx_stalled,
+			  jiffies + rx_drain_timeout_jiffies);
 	}

 	skb_queue_tail(&queue->rx_queue, skb);
@@ -267,7 +279,7 @@ static void xenvif_down(struct xenvif *vif)
 static int xenvif_open(struct net_device *dev)
 {
 	struct xenvif *vif = netdev_priv(dev);
-	if (netif_carrier_ok(dev))
+	if (test_bit(VIF_STATUS_CONNECTED, &vif->status))
 		xenvif_up(vif);
 	netif_tx_start_all_queues(dev);
 	return 0;
@@ -276,7 +288,7 @@ static int xenvif_open(struct net_device *dev)
 static int xenvif_close(struct net_device *dev)
 {
 	struct xenvif *vif = netdev_priv(dev);
-	if (netif_carrier_ok(dev))
+	if (test_bit(VIF_STATUS_CONNECTED, &vif->status))
 		xenvif_down(vif);
 	netif_tx_stop_all_queues(dev);
 	return 0;
@@ -514,7 +526,7 @@ int xenvif_init_queue(struct xenvif_queue *queue)
 		queue->grant_tx_handle[i] = NETBACK_INVALID_HANDLE;
 	}

-	init_timer(&queue->wake_queue);
+	init_timer(&queue->rx_stalled);

 	netif_napi_add(queue->vif->dev, &queue->napi, xenvif_poll,
 			XENVIF_NAPI_WEIGHT);
@@ -528,6 +540,7 @@ void xenvif_carrier_on(struct xenvif *vif)
 	if (!vif->can_sg && vif->dev->mtu > ETH_DATA_LEN)
 		dev_set_mtu(vif->dev, ETH_DATA_LEN);
 	netdev_update_features(vif->dev);
+	set_bit(VIF_STATUS_CONNECTED, &vif->status);
 	netif_carrier_on(vif->dev);
 	if (netif_running(vif->dev))
 		xenvif_up(vif);
@@ -625,9 +638,11 @@ void xenvif_carrier_off(struct xenvif *vif)
 	struct net_device *dev = vif->dev;

 	rtnl_lock();
-	netif_carrier_off(dev); /* discard queued packets */
-	if (netif_running(dev))
-		xenvif_down(vif);
+	if (test_and_clear_bit(VIF_STATUS_CONNECTED, &vif->status)) {
+		netif_carrier_off(dev); /* discard queued packets */
+		if (netif_running(dev))
+			xenvif_down(vif);
+	}
 	rtnl_unlock();
 }

@@ -656,14 +671,13 @@ void xenvif_disconnect(struct xenvif *vif)
 	unsigned int num_queues = vif->num_queues;
 	unsigned int queue_index;

-	if (netif_carrier_ok(vif->dev))
-		xenvif_carrier_off(vif);
+	xenvif_carrier_off(vif);

 	for (queue_index = 0; queue_index < num_queues; ++queue_index) {
 		queue = &vif->queues[queue_index];

 		if (queue->task) {
-			del_timer_sync(&queue->wake_queue);
+			del_timer_sync(&queue->rx_stalled);
 			kthread_stop(queue->task);
 			queue->task = NULL;
 		}
@@ -705,16 +719,12 @@ void xenvif_free(struct xenvif *vif)
 	/* Here we want to avoid timeout messages if an skb can be legitimately
 	 * stuck somewhere else. Realistically this could be an another vif's
 	 * internal or QDisc queue. That another vif also has this
-	 * rx_drain_timeout_msecs timeout, but the timer only ditches the
-	 * internal queue. After that, the QDisc queue can put in worst case
-	 * XEN_NETIF_RX_RING_SIZE / MAX_SKB_FRAGS skbs into that another vif's
-	 * internal queue, so we need several rounds of such timeouts until we
-	 * can be sure that no another vif should have skb's from us. We are
-	 * not sending more skb's, so newly stuck packets are not interesting
-	 * for us here.
+	 * rx_drain_timeout_msecs timeout, so give it time to drain out.
+	 * Although if that other guest wakes up just before its timeout happens
+	 * and takes only one skb from QDisc, it can hold onto other skbs for a
+	 * longer period.
 	 */
-	unsigned int worst_case_skb_lifetime = (rx_drain_timeout_msecs/1000) *
-		DIV_ROUND_UP(XENVIF_QUEUE_LENGTH, (XEN_NETIF_RX_RING_SIZE / MAX_SKB_FRAGS));
+	unsigned int worst_case_skb_lifetime = (rx_drain_timeout_msecs/1000);

 	unregister_netdev(vif->dev);


--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -1869,8 +1869,7 @@ void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx)
 static inline int rx_work_todo(struct xenvif_queue *queue)
 {
 	return (!skb_queue_empty(&queue->rx_queue) &&
-	       xenvif_rx_ring_slots_available(queue, queue->rx_last_skb_slots)) ||
-	       queue->rx_queue_purge;
+	       xenvif_rx_ring_slots_available(queue, queue->rx_last_skb_slots));
 }

 static inline int tx_work_todo(struct xenvif_queue *queue)
@@ -1935,6 +1934,75 @@ static void xenvif_start_queue(struct xenvif_queue *queue)
 		xenvif_wake_queue(queue);
 }

+/* Only called from the queue's thread, it handles the situation when the guest
+ * doesn't post enough requests on the receiving ring.
+ * First xenvif_start_xmit disables QDisc and start a timer, and then either the
+ * timer fires, or the guest send an interrupt after posting new request. If it
+ * is the timer, the carrier is turned off here.
+ * */
+static void xenvif_rx_purge_event(struct xenvif_queue *queue)
+{
+	/* Either the last unsuccesful skb or at least 1 slot should fit */
+	int needed = queue->rx_last_skb_slots ?
+		     queue->rx_last_skb_slots : 1;
+
+	/* It is assumed that if the guest post new slots after this, the RX
+	 * interrupt will set the QUEUE_STATUS_RX_PURGE_EVENT bit and wake up
+	 * the thread again
+	 */
+	set_bit(QUEUE_STATUS_RX_STALLED, &queue->status);
+	if (!xenvif_rx_ring_slots_available(queue, needed)) {
+		rtnl_lock();
+		if (netif_carrier_ok(queue->vif->dev)) {
+			/* Timer fired and there are still no slots. Turn off
+			 * everything except the interrupts
+			 */
+			netif_carrier_off(queue->vif->dev);
+			skb_queue_purge(&queue->rx_queue);
+			queue->rx_last_skb_slots = 0;
+			if (net_ratelimit())
+				netdev_err(queue->vif->dev, "Carrier off due to lack of guest response on queue %d\n", queue->id);
+		} else {
+			/* Probably an another queue already turned the carrier
+			 * off, make sure nothing is stucked in the internal
+			 * queue of this queue
+			 */
+			skb_queue_purge(&queue->rx_queue);
+			queue->rx_last_skb_slots = 0;
+		}
+		rtnl_unlock();
+	} else if (!netif_carrier_ok(queue->vif->dev)) {
+		unsigned int num_queues = queue->vif->num_queues;
+		unsigned int i;
+		/* The carrier was down, but an interrupt kicked
+		 * the thread again after new requests were
+		 * posted
+		 */
+		clear_bit(QUEUE_STATUS_RX_STALLED,
+			  &queue->status);
+		rtnl_lock();
+		netif_carrier_on(queue->vif->dev);
+		netif_tx_wake_all_queues(queue->vif->dev);
+		rtnl_unlock();
+
+		for (i = 0; i < num_queues; i++) {
+			struct xenvif_queue *temp = &queue->vif->queues[i];
+
+			xenvif_napi_schedule_or_enable_events(temp);
+		}
+		if (net_ratelimit())
+			netdev_err(queue->vif->dev, "Carrier on again\n");
+	} else {
+		/* Queuing were stopped, but the guest posted
+		 * new requests and sent an interrupt
+		 */
+		clear_bit(QUEUE_STATUS_RX_STALLED,
+			  &queue->status);
+		del_timer_sync(&queue->rx_stalled);
+		xenvif_start_queue(queue);
+	}
+}
+
 int xenvif_kthread_guest_rx(void *data)
 {
 	struct xenvif_queue *queue = data;
@@ -1944,8 +2012,12 @@ int xenvif_kthread_guest_rx(void *data)
 		wait_event_interruptible(queue->wq,
 					 rx_work_todo(queue) ||
 					 queue->vif->disabled ||
+					 test_bit(QUEUE_STATUS_RX_PURGE_EVENT, &queue->status) ||
 					 kthread_should_stop());

+		if (kthread_should_stop())
+			break;
+
 		/* This frontend is found to be rogue, disable it in
 		 * kthread context. Currently this is only set when
 		 * netback finds out frontend sends malformed packet,
@@ -1953,26 +2025,23 @@ int xenvif_kthread_guest_rx(void *data)
 		 * context so we defer it here, if this thread is
 		 * associated with queue 0.
 		 */
-		if (unlikely(queue->vif->disabled && netif_carrier_ok(queue->vif->dev) && queue->id == 0))
+		if (unlikely(queue->vif->disabled && queue->id == 0))
 			xenvif_carrier_off(queue->vif);
-
-		if (kthread_should_stop())
-			break;
-
-		if (queue->rx_queue_purge) {
+		else if (unlikely(test_and_clear_bit(QUEUE_STATUS_RX_PURGE_EVENT,
+						     &queue->status))) {
+			xenvif_rx_purge_event(queue);
+		} else if (!netif_carrier_ok(queue->vif->dev)) {
+			/* Another queue stalled and turned the carrier off, so
+			 * purge the internal queue of queues which were not
+			 * blocked
+			 */
 			skb_queue_purge(&queue->rx_queue);
-			queue->rx_queue_purge = false;
+			queue->rx_last_skb_slots = 0;
 		}

 		if (!skb_queue_empty(&queue->rx_queue))
 			xenvif_rx_action(queue);

-		if (skb_queue_empty(&queue->rx_queue) &&
-		    xenvif_queue_stopped(queue)) {
-			del_timer_sync(&queue->wake_queue);
-			xenvif_start_queue(queue);
-		}
-
 		cond_resched();
 	}