Merge branch 'vhost_net-Avoid-vq-kicks-during-busyloop'

Toshiaki Makita says: ==================== vhost_net: Avoid vq kicks during busyloop Under heavy load vhost tx busypoll tend not to suppress vq kicks, which causes poor guest tx performance. The detailed scenario is described in commitlog of patch 2. Rx seems not to have that serious problem, but for consistency I made a similar change on rx to avoid rx wakeups (patch 3). Additionary patch 4 is to avoid rx kicks under heavy load during busypoll. Tx performance is greatly improved by this change. I don't see notable performance change on rx with this series though. Performance numbers (tx): - Bulk transfer from guest to external physical server. [Guest]->vhost_net->tap--(XDP_REDIRECT)-->i40e --(wire)--> [Server] - Set 10us busypoll. - Guest disables checksum and TSO because of host XDP. - Measured single flow Mbps by netperf, and kicks by perf kvm stat (EPT_MISCONFIG event). Before After Mbps kicks/s Mbps kicks/s UDP_STREAM 1472byte 247758 27 Send 3645.37 6958.10 Recv 3588.56 6958.10 1byte 9865 37 Send 4.34 5.43 Recv 4.17 5.26 TCP_STREAM 8801.03 45794 9592.77 2884 v2: - Split patches into 3 parts (renaming variables, tx-kick fix, rx-wakeup fix). - Avoid rx-kicks too (patch 4). - Don't memorize endtime as it is not needed for now. ==================== Acked-by: N Michael S. Tsirkin <mst@redhat.com> Signed-off-by: N David S. Miller <davem@davemloft.net>

Merge branch 'vhost_net-Avoid-vq-kicks-during-busyloop'
Toshiaki Makita says: ==================== vhost_net: Avoid vq kicks during busyloop Under heavy load vhost tx busypoll tend not to suppress vq kicks, which causes poor guest tx performance. The detailed scenario is described in commitlog of patch 2. Rx seems not to have that serious problem, but for consistency I made a similar change on rx to avoid rx wakeups (patch 3). Additionary patch 4 is to avoid rx kicks under heavy load during busypoll. Tx performance is greatly improved by this change. I don't see notable performance change on rx with this series though. Performance numbers (tx): - Bulk transfer from guest to external physical server. [Guest]->vhost_net->tap--(XDP_REDIRECT)-->i40e --(wire)--> [Server] - Set 10us busypoll. - Guest disables checksum and TSO because of host XDP. - Measured single flow Mbps by netperf, and kicks by perf kvm stat (EPT_MISCONFIG event). Before After Mbps kicks/s Mbps kicks/s UDP_STREAM 1472byte 247758 27 Send 3645.37 6958.10 Recv 3588.56 6958.10 1byte 9865 37 Send 4.34 5.43 Recv 4.17 5.26 TCP_STREAM 8801.03 45794 9592.77 2884 v2: - Split patches into 3 parts (renaming variables, tx-kick fix, rx-wakeup fix). - Avoid rx-kicks too (patch 4). - Don't memorize endtime as it is not needed for now. ==================== Acked-by: N Michael S. Tsirkin <mst@redhat.com> Signed-off-by: N David S. Miller <davem@davemloft.net>
f744c4bb · David S. Miller · e7e3728b · 6369fec5 · f744c4bb
隐藏空白更改
内联并排

Showing with 60 addition and 35 deletion

drivers/vhost/net.c drivers/vhost/net.c +60 -35

未找到文件。
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -396,13 +396,10 @@ static inline unsigned long busy_clock(void)
 	return local_clock() >> 10;
 }

-static bool vhost_can_busy_poll(struct vhost_dev *dev,
-				unsigned long endtime)
+static bool vhost_can_busy_poll(unsigned long endtime)
 {
-	return likely(!need_resched()) &&
-	       likely(!time_after(busy_clock(), endtime)) &&
-	       likely(!signal_pending(current)) &&
-	       !vhost_has_work(dev);
+	return likely(!need_resched() && !time_after(busy_clock(), endtime) &&
+		      !signal_pending(current));
 }

 static void vhost_net_disable_vq(struct vhost_net *n,
@@ -434,7 +431,8 @@ static int vhost_net_enable_vq(struct vhost_net *n,
 static int vhost_net_tx_get_vq_desc(struct vhost_net *net,
 				    struct vhost_virtqueue *vq,
 				    struct iovec iov[], unsigned int iov_size,
-				    unsigned int *out_num, unsigned int *in_num)
+				    unsigned int *out_num, unsigned int *in_num,
+				    bool *busyloop_intr)
 {
 	unsigned long uninitialized_var(endtime);
 	int r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
@@ -443,9 +441,15 @@ static int vhost_net_tx_get_vq_desc(struct vhost_net *net,
 	if (r == vq->num && vq->busyloop_timeout) {
 		preempt_disable();
 		endtime = busy_clock() + vq->busyloop_timeout;
-		while (vhost_can_busy_poll(vq->dev, endtime) &&
-		       vhost_vq_avail_empty(vq->dev, vq))
+		while (vhost_can_busy_poll(endtime)) {
+			if (vhost_has_work(vq->dev)) {
+				*busyloop_intr = true;
+				break;
+			}
+			if (!vhost_vq_avail_empty(vq->dev, vq))
+				break;
 			cpu_relax();
+		}
 		preempt_enable();
 		r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
 				      out_num, in_num, NULL, NULL);
@@ -501,20 +505,24 @@ static void handle_tx(struct vhost_net *net)
 	zcopy = nvq->ubufs;

 	for (;;) {
+		bool busyloop_intr;
+
 		/* Release DMAs done buffers first */
 		if (zcopy)
 			vhost_zerocopy_signal_used(net, vq);

-
+		busyloop_intr = false;
 		head = vhost_net_tx_get_vq_desc(net, vq, vq->iov,
 						ARRAY_SIZE(vq->iov),
-						&out, &in);
+						&out, &in, &busyloop_intr);
 		/* On error, stop handling until the next kick. */
 		if (unlikely(head < 0))
 			break;
 		/* Nothing new?  Wait for eventfd to tell us they refilled. */
 		if (head == vq->num) {
-			if (unlikely(vhost_enable_notify(&net->dev, vq))) {
+			if (unlikely(busyloop_intr)) {
+				vhost_poll_queue(&vq->poll);
+			} else if (unlikely(vhost_enable_notify(&net->dev, vq))) {
 				vhost_disable_notify(&net->dev, vq);
 				continue;
 			}
@@ -645,41 +653,50 @@ static void vhost_rx_signal_used(struct vhost_net_virtqueue *nvq)
 	nvq->done_idx = 0;
 }

-static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk)
+static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk,
+				      bool *busyloop_intr)
 {
-	struct vhost_net_virtqueue *rvq = &net->vqs[VHOST_NET_VQ_RX];
-	struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX];
-	struct vhost_virtqueue *vq = &nvq->vq;
+	struct vhost_net_virtqueue *rnvq = &net->vqs[VHOST_NET_VQ_RX];
+	struct vhost_net_virtqueue *tnvq = &net->vqs[VHOST_NET_VQ_TX];
+	struct vhost_virtqueue *rvq = &rnvq->vq;
+	struct vhost_virtqueue *tvq = &tnvq->vq;
 	unsigned long uninitialized_var(endtime);
-	int len = peek_head_len(rvq, sk);
+	int len = peek_head_len(rnvq, sk);

-	if (!len && vq->busyloop_timeout) {
+	if (!len && tvq->busyloop_timeout) {
 		/* Flush batched heads first */
-		vhost_rx_signal_used(rvq);
+		vhost_rx_signal_used(rnvq);
 		/* Both tx vq and rx socket were polled here */
-		mutex_lock_nested(&vq->mutex, 1);
-		vhost_disable_notify(&net->dev, vq);
+		mutex_lock_nested(&tvq->mutex, 1);
+		vhost_disable_notify(&net->dev, tvq);

 		preempt_disable();
-		endtime = busy_clock() + vq->busyloop_timeout;
+		endtime = busy_clock() + tvq->busyloop_timeout;

-		while (vhost_can_busy_poll(&net->dev, endtime) &&
-		       !sk_has_rx_data(sk) &&
-		       vhost_vq_avail_empty(&net->dev, vq))
+		while (vhost_can_busy_poll(endtime)) {
+			if (vhost_has_work(&net->dev)) {
+				*busyloop_intr = true;
+				break;
+			}
+			if ((sk_has_rx_data(sk) &&
+			     !vhost_vq_avail_empty(&net->dev, rvq)) ||
+			    !vhost_vq_avail_empty(&net->dev, tvq))
+				break;
 			cpu_relax();
+		}

 		preempt_enable();

-		if (!vhost_vq_avail_empty(&net->dev, vq))
-			vhost_poll_queue(&vq->poll);
-		else if (unlikely(vhost_enable_notify(&net->dev, vq))) {
-			vhost_disable_notify(&net->dev, vq);
-			vhost_poll_queue(&vq->poll);
+		if (!vhost_vq_avail_empty(&net->dev, tvq)) {
+			vhost_poll_queue(&tvq->poll);
+		} else if (unlikely(vhost_enable_notify(&net->dev, tvq))) {
+			vhost_disable_notify(&net->dev, tvq);
+			vhost_poll_queue(&tvq->poll);
 		}

-		mutex_unlock(&vq->mutex);
+		mutex_unlock(&tvq->mutex);

-		len = peek_head_len(rvq, sk);
+		len = peek_head_len(rnvq, sk);
 	}

 	return len;
@@ -786,6 +803,7 @@ static void handle_rx(struct vhost_net *net)
 	s16 headcount;
 	size_t vhost_hlen, sock_hlen;
 	size_t vhost_len, sock_len;
+	bool busyloop_intr = false;
 	struct socket *sock;
 	struct iov_iter fixup;
 	__virtio16 num_buffers;
@@ -809,7 +827,8 @@ static void handle_rx(struct vhost_net *net)
 		vq->log : NULL;
 	mergeable = vhost_has_feature(vq, VIRTIO_NET_F_MRG_RXBUF);

-	while ((sock_len = vhost_net_rx_peek_head_len(net, sock->sk))) {
+	while ((sock_len = vhost_net_rx_peek_head_len(net, sock->sk,
+						      &busyloop_intr))) {
 		sock_len += sock_hlen;
 		vhost_len = sock_len + vhost_hlen;
 		headcount = get_rx_bufs(vq, vq->heads + nvq->done_idx,
@@ -820,7 +839,9 @@ static void handle_rx(struct vhost_net *net)
 			goto out;
 		/* OK, now we need to know about added descriptors. */
 		if (!headcount) {
-			if (unlikely(vhost_enable_notify(&net->dev, vq))) {
+			if (unlikely(busyloop_intr)) {
+				vhost_poll_queue(&vq->poll);
+			} else if (unlikely(vhost_enable_notify(&net->dev, vq))) {
 				/* They have slipped one in as we were
 				 * doing that: check again. */
 				vhost_disable_notify(&net->dev, vq);
@@ -830,6 +851,7 @@ static void handle_rx(struct vhost_net *net)
 			 * they refilled. */
 			goto out;
 		}
+		busyloop_intr = false;
 		if (nvq->rx_ring)
 			msg.msg_control = vhost_net_buf_consume(&nvq->rxq);
 		/* On overrun, truncate and discard */
@@ -896,7 +918,10 @@ static void handle_rx(struct vhost_net *net)
 			goto out;
 		}
 	}
-	vhost_net_enable_vq(net, vq);
+	if (unlikely(busyloop_intr))
+		vhost_poll_queue(&vq->poll);
+	else
+		vhost_net_enable_vq(net, vq);
 out:
 	vhost_rx_signal_used(nvq);
 	mutex_unlock(&vq->mutex);