提交 ca9ec088 编写于 作者: A Alexander Duyck 提交者: Jeff Kirsher

i40e/i40evf: Add support for padding start of frames

This patch adds padding to the start of frames to make room for headroom
for us to eventually start using build_skb.  Right now we guarantee at
least NET_SKB_PAD + NET_IP_ALIGN, however we allocate more space if more is
available.  For example on x86 the headroom should be 192 bytes.

On systems that have too large of a cache line size to support storing 1.5K
padding and shared info we default to using 3K buffers and reserve
everything that isn't used for skb_shared_info or the data buffer for
headroom.

Change-ID: I33c641c9a1ea10cf7cc484c2d20985368d2d709a
Signed-off-by: NAlexander Duyck <alexander.h.duyck@intel.com>
Tested-by: NAndrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: NJeff Kirsher <jeffrey.t.kirsher@intel.com>
上级 98efd694
master alk-4.19.24 alk-4.19.30 alk-4.19.34 alk-4.19.36 alk-4.19.43 alk-4.19.48 alk-4.19.57 ck-4.19.67 ck-4.19.81 ck-4.19.91 github/fork/deepanshu1422/fix-typo-in-comment github/fork/haosdent/fix-typo linux-next v4.19.91 v4.19.90 v4.19.89 v4.19.88 v4.19.87 v4.19.86 v4.19.85 v4.19.84 v4.19.83 v4.19.82 v4.19.81 v4.19.80 v4.19.79 v4.19.78 v4.19.77 v4.19.76 v4.19.75 v4.19.74 v4.19.73 v4.19.72 v4.19.71 v4.19.70 v4.19.69 v4.19.68 v4.19.67 v4.19.66 v4.19.65 v4.19.64 v4.19.63 v4.19.62 v4.19.61 v4.19.60 v4.19.59 v4.19.58 v4.19.57 v4.19.56 v4.19.55 v4.19.54 v4.19.53 v4.19.52 v4.19.51 v4.19.50 v4.19.49 v4.19.48 v4.19.47 v4.19.46 v4.19.45 v4.19.44 v4.19.43 v4.19.42 v4.19.41 v4.19.40 v4.19.39 v4.19.38 v4.19.37 v4.19.36 v4.19.35 v4.19.34 v4.19.33 v4.19.32 v4.19.31 v4.19.30 v4.19.29 v4.19.28 v4.19.27 v4.19.26 v4.19.25 v4.19.24 v4.19.23 v4.19.22 v4.19.21 v4.19.20 v4.19.19 v4.19.18 v4.19.17 v4.19.16 v4.19.15 v4.19.14 v4.19.13 v4.19.12 v4.19.11 v4.19.10 v4.19.9 v4.19.8 v4.19.7 v4.19.6 v4.19.5 v4.19.4 v4.19.3 v4.19.2 v4.19.1 v4.19 v4.19-rc8 v4.19-rc7 v4.19-rc6 v4.19-rc5 v4.19-rc4 v4.19-rc3 v4.19-rc2 v4.19-rc1 ck-release-21 ck-release-20 ck-release-19.2 ck-release-19.1 ck-release-19 ck-release-18 ck-release-17.2 ck-release-17.1 ck-release-17 ck-release-16 ck-release-15.1 ck-release-15 ck-release-14 ck-release-13.2 ck-release-13 ck-release-12 ck-release-11 ck-release-10 ck-release-9 ck-release-7 alk-release-15 alk-release-14 alk-release-13.2 alk-release-13 alk-release-12 alk-release-11 alk-release-10 alk-release-9 alk-release-7
无相关合并请求
......@@ -3038,6 +3038,12 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
return -ENOMEM;
}
/* configure Rx buffer alignment */
if (!vsi->netdev || (vsi->back->flags & I40E_FLAG_LEGACY_RX))
clear_ring_build_skb_enabled(ring);
else
set_ring_build_skb_enabled(ring);
/* cache tail for quicker writes, and clear the reg before use */
ring->tail = hw->hw_addr + I40E_QRX_TAIL(pf_q);
writel(0, ring->tail);
......@@ -3079,7 +3085,8 @@ static int i40e_vsi_configure_rx(struct i40e_vsi *vsi)
vsi->max_frame = I40E_MAX_RXBUFFER;
vsi->rx_buf_len = I40E_RXBUFFER_2048;
#if (PAGE_SIZE < 8192)
} else if (vsi->netdev->mtu <= ETH_DATA_LEN) {
} else if (!I40E_2K_TOO_SMALL_WITH_PADDING &&
(vsi->netdev->mtu <= ETH_DATA_LEN)) {
vsi->max_frame = I40E_RXBUFFER_1536 - NET_IP_ALIGN;
vsi->rx_buf_len = I40E_RXBUFFER_1536 - NET_IP_ALIGN;
#endif
......
......@@ -1247,6 +1247,17 @@ static inline void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val)
writel(val, rx_ring->tail);
}
/**
* i40e_rx_offset - Return expected offset into page to access data
* @rx_ring: Ring we are requesting offset of
*
* Returns the offset value for ring into the data buffer.
*/
static inline unsigned int i40e_rx_offset(struct i40e_ring *rx_ring)
{
return ring_uses_build_skb(rx_ring) ? I40E_SKB_PAD : 0;
}
/**
* i40e_alloc_mapped_page - recycle or make a new page
* @rx_ring: ring to use
......@@ -1291,7 +1302,7 @@ static bool i40e_alloc_mapped_page(struct i40e_ring *rx_ring,
bi->dma = dma;
bi->page = page;
bi->page_offset = 0;
bi->page_offset = i40e_rx_offset(rx_ring);
/* initialize pagecnt_bias to 1 representing we fully own page */
bi->pagecnt_bias = 1;
......@@ -1696,7 +1707,7 @@ static void i40e_add_rx_frag(struct i40e_ring *rx_ring,
#if (PAGE_SIZE < 8192)
unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2;
#else
unsigned int truesize = SKB_DATA_ALIGN(size);
unsigned int truesize = SKB_DATA_ALIGN(size + i40e_rx_offset(rx_ring));
#endif
skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page,
......
......@@ -135,6 +135,58 @@ enum i40e_dyn_idx_t {
#define I40E_RX_DMA_ATTR \
(DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING)
/* Attempt to maximize the headroom available for incoming frames. We
* use a 2K buffer for receives and need 1536/1534 to store the data for
* the frame. This leaves us with 512 bytes of room. From that we need
* to deduct the space needed for the shared info and the padding needed
* to IP align the frame.
*
* Note: For cache line sizes 256 or larger this value is going to end
* up negative. In these cases we should fall back to the legacy
* receive path.
*/
#if (PAGE_SIZE < 8192)
#define I40E_2K_TOO_SMALL_WITH_PADDING \
((NET_SKB_PAD + I40E_RXBUFFER_1536) > SKB_WITH_OVERHEAD(I40E_RXBUFFER_2048))
static inline int i40e_compute_pad(int rx_buf_len)
{
int page_size, pad_size;
page_size = ALIGN(rx_buf_len, PAGE_SIZE / 2);
pad_size = SKB_WITH_OVERHEAD(page_size) - rx_buf_len;
return pad_size;
}
static inline int i40e_skb_pad(void)
{
int rx_buf_len;
/* If a 2K buffer cannot handle a standard Ethernet frame then
* optimize padding for a 3K buffer instead of a 1.5K buffer.
*
* For a 3K buffer we need to add enough padding to allow for
* tailroom due to NET_IP_ALIGN possibly shifting us out of
* cache-line alignment.
*/
if (I40E_2K_TOO_SMALL_WITH_PADDING)
rx_buf_len = I40E_RXBUFFER_3072 + SKB_DATA_ALIGN(NET_IP_ALIGN);
else
rx_buf_len = I40E_RXBUFFER_1536;
/* if needed make room for NET_IP_ALIGN */
rx_buf_len -= NET_IP_ALIGN;
return i40e_compute_pad(rx_buf_len);
}
#define I40E_SKB_PAD i40e_skb_pad()
#else
#define I40E_2K_TOO_SMALL_WITH_PADDING false
#define I40E_SKB_PAD (NET_SKB_PAD + NET_IP_ALIGN)
#endif
/**
* i40e_test_staterr - tests bits in Rx descriptor status and error fields
* @rx_desc: pointer to receive descriptor (in le64 format)
......@@ -341,7 +393,8 @@ struct i40e_ring {
u8 packet_stride;
u16 flags;
#define I40E_TXR_FLAGS_WB_ON_ITR BIT(0)
#define I40E_TXR_FLAGS_WB_ON_ITR BIT(0)
#define I40E_RXR_FLAGS_BUILD_SKB_ENABLED BIT(1)
/* stats structs */
struct i40e_queue_stats stats;
......@@ -369,6 +422,21 @@ struct i40e_ring {
*/
} ____cacheline_internodealigned_in_smp;
static inline bool ring_uses_build_skb(struct i40e_ring *ring)
{
return !!(ring->flags & I40E_RXR_FLAGS_BUILD_SKB_ENABLED);
}
static inline void set_ring_build_skb_enabled(struct i40e_ring *ring)
{
ring->flags |= I40E_RXR_FLAGS_BUILD_SKB_ENABLED;
}
static inline void clear_ring_build_skb_enabled(struct i40e_ring *ring)
{
ring->flags &= ~I40E_RXR_FLAGS_BUILD_SKB_ENABLED;
}
enum i40e_latency_range {
I40E_LOWEST_LATENCY = 0,
I40E_LOW_LATENCY = 1,
......
......@@ -618,6 +618,17 @@ static inline void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val)
writel(val, rx_ring->tail);
}
/**
* i40e_rx_offset - Return expected offset into page to access data
* @rx_ring: Ring we are requesting offset of
*
* Returns the offset value for ring into the data buffer.
*/
static inline unsigned int i40e_rx_offset(struct i40e_ring *rx_ring)
{
return ring_uses_build_skb(rx_ring) ? I40E_SKB_PAD : 0;
}
/**
* i40e_alloc_mapped_page - recycle or make a new page
* @rx_ring: ring to use
......@@ -662,7 +673,7 @@ static bool i40e_alloc_mapped_page(struct i40e_ring *rx_ring,
bi->dma = dma;
bi->page = page;
bi->page_offset = 0;
bi->page_offset = i40e_rx_offset(rx_ring);
/* initialize pagecnt_bias to 1 representing we fully own page */
bi->pagecnt_bias = 1;
......@@ -1057,7 +1068,7 @@ static void i40e_add_rx_frag(struct i40e_ring *rx_ring,
#if (PAGE_SIZE < 8192)
unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2;
#else
unsigned int truesize = SKB_DATA_ALIGN(size);
unsigned int truesize = SKB_DATA_ALIGN(size + i40e_rx_offset(rx_ring));
#endif
skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page,
......
......@@ -122,6 +122,58 @@ enum i40e_dyn_idx_t {
#define I40E_RX_DMA_ATTR \
(DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING)
/* Attempt to maximize the headroom available for incoming frames. We
* use a 2K buffer for receives and need 1536/1534 to store the data for
* the frame. This leaves us with 512 bytes of room. From that we need
* to deduct the space needed for the shared info and the padding needed
* to IP align the frame.
*
* Note: For cache line sizes 256 or larger this value is going to end
* up negative. In these cases we should fall back to the legacy
* receive path.
*/
#if (PAGE_SIZE < 8192)
#define I40E_2K_TOO_SMALL_WITH_PADDING \
((NET_SKB_PAD + I40E_RXBUFFER_1536) > SKB_WITH_OVERHEAD(I40E_RXBUFFER_2048))
static inline int i40e_compute_pad(int rx_buf_len)
{
int page_size, pad_size;
page_size = ALIGN(rx_buf_len, PAGE_SIZE / 2);
pad_size = SKB_WITH_OVERHEAD(page_size) - rx_buf_len;
return pad_size;
}
static inline int i40e_skb_pad(void)
{
int rx_buf_len;
/* If a 2K buffer cannot handle a standard Ethernet frame then
* optimize padding for a 3K buffer instead of a 1.5K buffer.
*
* For a 3K buffer we need to add enough padding to allow for
* tailroom due to NET_IP_ALIGN possibly shifting us out of
* cache-line alignment.
*/
if (I40E_2K_TOO_SMALL_WITH_PADDING)
rx_buf_len = I40E_RXBUFFER_3072 + SKB_DATA_ALIGN(NET_IP_ALIGN);
else
rx_buf_len = I40E_RXBUFFER_1536;
/* if needed make room for NET_IP_ALIGN */
rx_buf_len -= NET_IP_ALIGN;
return i40e_compute_pad(rx_buf_len);
}
#define I40E_SKB_PAD i40e_skb_pad()
#else
#define I40E_2K_TOO_SMALL_WITH_PADDING false
#define I40E_SKB_PAD (NET_SKB_PAD + NET_IP_ALIGN)
#endif
/**
* i40e_test_staterr - tests bits in Rx descriptor status and error fields
* @rx_desc: pointer to receive descriptor (in le64 format)
......@@ -328,7 +380,8 @@ struct i40e_ring {
u8 packet_stride;
u16 flags;
#define I40E_TXR_FLAGS_WB_ON_ITR BIT(0)
#define I40E_TXR_FLAGS_WB_ON_ITR BIT(0)
#define I40E_RXR_FLAGS_BUILD_SKB_ENABLED BIT(1)
/* stats structs */
struct i40e_queue_stats stats;
......@@ -356,6 +409,21 @@ struct i40e_ring {
*/
} ____cacheline_internodealigned_in_smp;
static inline bool ring_uses_build_skb(struct i40e_ring *ring)
{
return !!(ring->flags & I40E_RXR_FLAGS_BUILD_SKB_ENABLED);
}
static inline void set_ring_build_skb_enabled(struct i40e_ring *ring)
{
ring->flags |= I40E_RXR_FLAGS_BUILD_SKB_ENABLED;
}
static inline void clear_ring_build_skb_enabled(struct i40e_ring *ring)
{
ring->flags &= ~I40E_RXR_FLAGS_BUILD_SKB_ENABLED;
}
enum i40e_latency_range {
I40E_LOWEST_LATENCY = 0,
I40E_LOW_LATENCY = 1,
......
......@@ -704,7 +704,8 @@ static void i40evf_configure_rx(struct i40evf_adapter *adapter)
* standard Ethernet mtu. On x86 this gives us enough room
* for shared info and 192 bytes of padding.
*/
if (netdev->mtu <= ETH_DATA_LEN)
if (!I40E_2K_TOO_SMALL_WITH_PADDING &&
(netdev->mtu <= ETH_DATA_LEN))
rx_buf_len = I40E_RXBUFFER_1536 - NET_IP_ALIGN;
}
#endif
......@@ -712,6 +713,11 @@ static void i40evf_configure_rx(struct i40evf_adapter *adapter)
for (i = 0; i < adapter->num_active_queues; i++) {
adapter->rx_rings[i].tail = hw->hw_addr + I40E_QRX_TAIL1(i);
adapter->rx_rings[i].rx_buf_len = rx_buf_len;
if (adapter->flags & I40EVF_FLAG_LEGACY_RX)
clear_ring_build_skb_enabled(&adapter->rx_rings[i]);
else
set_ring_build_skb_enabled(&adapter->rx_rings[i]);
}
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册
反馈
建议
客服 返回
顶部