ixgbe: Replace standard receive path with a page based receive

This patch replaces the existing Rx hot-path in the ixgbe driver with a new implementation that is based on performing a double buffered receive. The ixgbe driver already had something similar in place for its' packet split path, however in that case we were still receiving the header for the packet into the sk_buff. The big change here is the entire receive path will receive into pages only, and then pull the header out of the page and copy it into the sk_buff data. There are several motivations behind this approach. First, this allows us to avoid several cache misses as we were taking a set of cache misses for allocating the sk_buff and then another set for receiving data into the sk_buff. We are able to avoid these misses on receive now as we allocate the sk_buff when data is available. Second we are able to see a considerable performance gain when an IOMMU is enabled because we are no longer unmapping every buffer on receive. Instead we can delay the unmap until we are unable to use the page, and instead we can simply call sync_single_range on the half of the page that contains new data. Finally we are able to drop a considerable amount of code from the driver as we no longer have to support 2 different receive modes, packet split and one buffer. This allows us to optimize the Rx path further since less branching is required. Signed-off-by: N Alexander Duyck <alexander.h.duyck@intel.com> Tested-by: N Ross Brattain <ross.b.brattain@intel.com> Tested-by: N Stephen Ko <stephen.s.ko@intel.com> Signed-off-by: N Jeff Kirsher <jeffrey.t.kirsher@intel.com>

ixgbe: Replace standard receive path with a page based receive
This patch replaces the existing Rx hot-path in the ixgbe driver with a new implementation that is based on performing a double buffered receive. The ixgbe driver already had something similar in place for its' packet split path, however in that case we were still receiving the header for the packet into the sk_buff. The big change here is the entire receive path will receive into pages only, and then pull the header out of the page and copy it into the sk_buff data. There are several motivations behind this approach. First, this allows us to avoid several cache misses as we were taking a set of cache misses for allocating the sk_buff and then another set for receiving data into the sk_buff. We are able to avoid these misses on receive now as we allocate the sk_buff when data is available. Second we are able to see a considerable performance gain when an IOMMU is enabled because we are no longer unmapping every buffer on receive. Instead we can delay the unmap until we are unable to use the page, and instead we can simply call sync_single_range on the half of the page that contains new data. Finally we are able to drop a considerable amount of code from the driver as we no longer have to support 2 different receive modes, packet split and one buffer. This allows us to optimize the Rx path further since less branching is required. Signed-off-by: N Alexander Duyck <alexander.h.duyck@intel.com> Tested-by: N Ross Brattain <ross.b.brattain@intel.com> Tested-by: N Stephen Ko <stephen.s.ko@intel.com> Signed-off-by: N Jeff Kirsher <jeffrey.t.kirsher@intel.com>
f800326d · Alexander Duyck · Jeff Kirsher · 3f2d1c0f · f800326d · f800326d
3 changed file
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -72,12 +72,6 @@

 /* Supported Rx Buffer Sizes */
 #define IXGBE_RXBUFFER_512   512    /* Used for packet split */
-#define IXGBE_RXBUFFER_2K   2048
-#define IXGBE_RXBUFFER_3K   3072
-#define IXGBE_RXBUFFER_4K   4096
-#define IXGBE_RXBUFFER_7K   7168
-#define IXGBE_RXBUFFER_8K   8192
-#define IXGBE_RXBUFFER_15K  15360
 #define IXGBE_MAX_RXBUFFER  16384  /* largest size for a single descriptor */

 /*
@@ -168,7 +162,6 @@ struct ixgbe_rx_buffer {
 	struct sk_buff *skb;
 	dma_addr_t dma;
 	struct page *page;
-	dma_addr_t page_dma;
 	unsigned int page_offset;
 };

@@ -193,21 +186,15 @@ struct ixgbe_rx_queue_stats {
 	u64 csum_err;
 };

-enum ixbge_ring_state_t {
+enum ixgbe_ring_state_t {
 	__IXGBE_TX_FDIR_INIT_DONE,
 	__IXGBE_TX_DETECT_HANG,
 	__IXGBE_HANG_CHECK_ARMED,
-	__IXGBE_RX_PS_ENABLED,
 	__IXGBE_RX_RSC_ENABLED,
 	__IXGBE_RX_CSUM_UDP_ZERO_ERR,
+	__IXGBE_RX_FCOE_BUFSZ,
 };

-#define ring_is_ps_enabled(ring) \
-	test_bit(__IXGBE_RX_PS_ENABLED, &(ring)->state)
-#define set_ring_ps_enabled(ring) \
-	set_bit(__IXGBE_RX_PS_ENABLED, &(ring)->state)
-#define clear_ring_ps_enabled(ring) \
-	clear_bit(__IXGBE_RX_PS_ENABLED, &(ring)->state)
 #define check_for_tx_hang(ring) \
 	test_bit(__IXGBE_TX_DETECT_HANG, &(ring)->state)
 #define set_check_for_tx_hang(ring) \
@@ -233,7 +220,6 @@ struct ixgbe_ring {
 	u8 __iomem *tail;

 	u16 count;			/* amount of descriptors */
-	u16 rx_buf_len;

 	u8 queue_index; /* needed for multiqueue queue management */
 	u8 reg_idx;			/* holds the special value that gets
@@ -241,8 +227,13 @@ struct ixgbe_ring {
 					 * associated with this ring, which is
 					 * different for DCB and RSS modes
 					 */
-	u8 atr_sample_rate;
-	u8 atr_count;
+	union {
+		struct {
+			u8 atr_sample_rate;
+			u8 atr_count;
+		};
+		u16 next_to_alloc;
+	};

 	u16 next_to_use;
 	u16 next_to_clean;
@@ -287,6 +278,22 @@ struct ixgbe_ring_feature {
 	int mask;
 } ____cacheline_internodealigned_in_smp;

+/*
+ * FCoE requires that all Rx buffers be over 2200 bytes in length.  Since
+ * this is twice the size of a half page we need to double the page order
+ * for FCoE enabled Rx queues.
+ */
+#if defined(IXGBE_FCOE) && (PAGE_SIZE < 8192)
+static inline unsigned int ixgbe_rx_pg_order(struct ixgbe_ring *ring)
+{
+	return test_bit(__IXGBE_RX_FCOE_BUFSZ, &ring->state) ? 1 : 0;
+}
+#else
+#define ixgbe_rx_pg_order(_ring) 0
+#endif
+#define ixgbe_rx_pg_size(_ring) (PAGE_SIZE << ixgbe_rx_pg_order(_ring))
+#define ixgbe_rx_bufsz(_ring) ((PAGE_SIZE / 2) << ixgbe_rx_pg_order(_ring))
+
 struct ixgbe_ring_container {
 	struct ixgbe_ring *ring;	/* pointer to linked list of rings */
 	unsigned int total_bytes;	/* total bytes processed this int */
@@ -554,7 +561,7 @@ struct ixgbe_cb {
 	};
 	dma_addr_t dma;
 	u16 append_cnt;
-	bool delay_unmap;
+	bool page_released;
 };
 #define IXGBE_CB(skb) ((struct ixgbe_cb *)(skb)->cb)


--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
@@ -35,6 +35,7 @@
 #include <linux/netdevice.h>
 #include <linux/ethtool.h>
 #include <linux/vmalloc.h>
+#include <linux/highmem.h>
 #include <linux/uaccess.h>

 #include "ixgbe.h"
@@ -1615,7 +1616,6 @@ static int ixgbe_setup_desc_rings(struct ixgbe_adapter *adapter)
 	rx_ring->dev = &adapter->pdev->dev;
 	rx_ring->netdev = adapter->netdev;
 	rx_ring->reg_idx = adapter->rx_ring[0]->reg_idx;
-	rx_ring->rx_buf_len = IXGBE_RXBUFFER_2K;

 	err = ixgbe_setup_rx_resources(rx_ring);
 	if (err) {
@@ -1718,13 +1718,15 @@ static bool ixgbe_check_lbtest_frame(struct ixgbe_rx_buffer *rx_buffer,

 	frame_size >>= 1;

-	data = rx_buffer->skb->data;
+	data = kmap(rx_buffer->page) + rx_buffer->page_offset;

 	if (data[3] != 0xFF ||
 	    data[frame_size + 10] != 0xBE ||
 	    data[frame_size + 12] != 0xAF)
 		match = false;

+	kunmap(rx_buffer->page);
+
 	return match;
 }

@@ -1746,17 +1748,22 @@ static u16 ixgbe_clean_test_rings(struct ixgbe_ring *rx_ring,
 		/* check Rx buffer */
 		rx_buffer = &rx_ring->rx_buffer_info[rx_ntc];

-		/* unmap Rx buffer, will be remapped by alloc_rx_buffers */
-		dma_unmap_single(rx_ring->dev,
-				 rx_buffer->dma,
-				 rx_ring->rx_buf_len,
-				 DMA_FROM_DEVICE);
-		rx_buffer->dma = 0;
+		/* sync Rx buffer for CPU read */
+		dma_sync_single_for_cpu(rx_ring->dev,
+					rx_buffer->dma,
+					ixgbe_rx_bufsz(rx_ring),
+					DMA_FROM_DEVICE);

 		/* verify contents of skb */
 		if (ixgbe_check_lbtest_frame(rx_buffer, size))
 			count++;

+		/* sync Rx buffer for device write */
+		dma_sync_single_for_device(rx_ring->dev,
+					   rx_buffer->dma,
+					   ixgbe_rx_bufsz(rx_ring),
+					   DMA_FROM_DEVICE);
+
 		/* unmap buffer on Tx side */
 		tx_buffer = &tx_ring->tx_buffer_info[tx_ntc];
 		ixgbe_unmap_and_free_tx_resource(tx_ring, tx_buffer);

--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c