提交 042a53a9 编写于 作者: E Eric Dumazet 提交者: David S. Miller

net: skb_shared_info optimization

skb_dma_unmap() is quite expensive for small packets,
because we use two different cache lines from skb_shared_info.

One to access nr_frags, one to access dma_maps[0]

Instead of dma_maps being an array of MAX_SKB_FRAGS + 1 elements,
let dma_head alone in a new dma_head field, close to nr_frags,
to reduce cache lines misses.

Tested on my dev machine (bnx2 & tg3 adapters), nice speedup !
Signed-off-by: NEric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
上级 eae3f29c
...@@ -5487,7 +5487,7 @@ bnx2_run_loopback(struct bnx2 *bp, int loopback_mode) ...@@ -5487,7 +5487,7 @@ bnx2_run_loopback(struct bnx2 *bp, int loopback_mode)
dev_kfree_skb(skb); dev_kfree_skb(skb);
return -EIO; return -EIO;
} }
map = skb_shinfo(skb)->dma_maps[0]; map = skb_shinfo(skb)->dma_head;
REG_WR(bp, BNX2_HC_COMMAND, REG_WR(bp, BNX2_HC_COMMAND,
bp->hc_cmd | BNX2_HC_COMMAND_COAL_NOW_WO_INT); bp->hc_cmd | BNX2_HC_COMMAND_COAL_NOW_WO_INT);
...@@ -6167,7 +6167,7 @@ bnx2_start_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -6167,7 +6167,7 @@ bnx2_start_xmit(struct sk_buff *skb, struct net_device *dev)
} }
sp = skb_shinfo(skb); sp = skb_shinfo(skb);
mapping = sp->dma_maps[0]; mapping = sp->dma_head;
tx_buf = &txr->tx_buf_ring[ring_prod]; tx_buf = &txr->tx_buf_ring[ring_prod];
tx_buf->skb = skb; tx_buf->skb = skb;
...@@ -6191,7 +6191,7 @@ bnx2_start_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -6191,7 +6191,7 @@ bnx2_start_xmit(struct sk_buff *skb, struct net_device *dev)
txbd = &txr->tx_desc_ring[ring_prod]; txbd = &txr->tx_desc_ring[ring_prod];
len = frag->size; len = frag->size;
mapping = sp->dma_maps[i + 1]; mapping = sp->dma_maps[i];
txbd->tx_bd_haddr_hi = (u64) mapping >> 32; txbd->tx_bd_haddr_hi = (u64) mapping >> 32;
txbd->tx_bd_haddr_lo = (u64) mapping & 0xffffffff; txbd->tx_bd_haddr_lo = (u64) mapping & 0xffffffff;
......
...@@ -2998,7 +2998,7 @@ static int e1000_tx_map(struct e1000_adapter *adapter, ...@@ -2998,7 +2998,7 @@ static int e1000_tx_map(struct e1000_adapter *adapter,
size -= 4; size -= 4;
buffer_info->length = size; buffer_info->length = size;
buffer_info->dma = map[0] + offset; buffer_info->dma = skb_shinfo(skb)->dma_head + offset;
buffer_info->time_stamp = jiffies; buffer_info->time_stamp = jiffies;
buffer_info->next_to_watch = i; buffer_info->next_to_watch = i;
...@@ -3039,7 +3039,7 @@ static int e1000_tx_map(struct e1000_adapter *adapter, ...@@ -3039,7 +3039,7 @@ static int e1000_tx_map(struct e1000_adapter *adapter,
size -= 4; size -= 4;
buffer_info->length = size; buffer_info->length = size;
buffer_info->dma = map[f + 1] + offset; buffer_info->dma = map[f] + offset;
buffer_info->time_stamp = jiffies; buffer_info->time_stamp = jiffies;
buffer_info->next_to_watch = i; buffer_info->next_to_watch = i;
......
...@@ -3916,7 +3916,7 @@ static int e1000_tx_map(struct e1000_adapter *adapter, ...@@ -3916,7 +3916,7 @@ static int e1000_tx_map(struct e1000_adapter *adapter,
buffer_info->length = size; buffer_info->length = size;
buffer_info->time_stamp = jiffies; buffer_info->time_stamp = jiffies;
buffer_info->next_to_watch = i; buffer_info->next_to_watch = i;
buffer_info->dma = map[0] + offset; buffer_info->dma = skb_shinfo(skb)->dma_head + offset;
count++; count++;
len -= size; len -= size;
...@@ -3947,7 +3947,7 @@ static int e1000_tx_map(struct e1000_adapter *adapter, ...@@ -3947,7 +3947,7 @@ static int e1000_tx_map(struct e1000_adapter *adapter,
buffer_info->length = size; buffer_info->length = size;
buffer_info->time_stamp = jiffies; buffer_info->time_stamp = jiffies;
buffer_info->next_to_watch = i; buffer_info->next_to_watch = i;
buffer_info->dma = map[f + 1] + offset; buffer_info->dma = map[f] + offset;
len -= size; len -= size;
offset += size; offset += size;
......
...@@ -3139,8 +3139,7 @@ static inline int igb_tx_map_adv(struct igb_adapter *adapter, ...@@ -3139,8 +3139,7 @@ static inline int igb_tx_map_adv(struct igb_adapter *adapter,
/* set time_stamp *before* dma to help avoid a possible race */ /* set time_stamp *before* dma to help avoid a possible race */
buffer_info->time_stamp = jiffies; buffer_info->time_stamp = jiffies;
buffer_info->next_to_watch = i; buffer_info->next_to_watch = i;
buffer_info->dma = map[count]; buffer_info->dma = skb_shinfo(skb)->dma_head;
count++;
for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) { for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
struct skb_frag_struct *frag; struct skb_frag_struct *frag;
...@@ -3164,7 +3163,7 @@ static inline int igb_tx_map_adv(struct igb_adapter *adapter, ...@@ -3164,7 +3163,7 @@ static inline int igb_tx_map_adv(struct igb_adapter *adapter,
tx_ring->buffer_info[i].skb = skb; tx_ring->buffer_info[i].skb = skb;
tx_ring->buffer_info[first].next_to_watch = i; tx_ring->buffer_info[first].next_to_watch = i;
return count; return count + 1;
} }
static inline void igb_tx_queue_adv(struct igb_adapter *adapter, static inline void igb_tx_queue_adv(struct igb_adapter *adapter,
......
...@@ -2119,8 +2119,7 @@ static inline int igbvf_tx_map_adv(struct igbvf_adapter *adapter, ...@@ -2119,8 +2119,7 @@ static inline int igbvf_tx_map_adv(struct igbvf_adapter *adapter,
/* set time_stamp *before* dma to help avoid a possible race */ /* set time_stamp *before* dma to help avoid a possible race */
buffer_info->time_stamp = jiffies; buffer_info->time_stamp = jiffies;
buffer_info->next_to_watch = i; buffer_info->next_to_watch = i;
buffer_info->dma = map[count]; buffer_info->dma = skb_shinfo(skb)->dma_head;
count++;
for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) { for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
struct skb_frag_struct *frag; struct skb_frag_struct *frag;
...@@ -2144,7 +2143,7 @@ static inline int igbvf_tx_map_adv(struct igbvf_adapter *adapter, ...@@ -2144,7 +2143,7 @@ static inline int igbvf_tx_map_adv(struct igbvf_adapter *adapter,
tx_ring->buffer_info[i].skb = skb; tx_ring->buffer_info[i].skb = skb;
tx_ring->buffer_info[first].next_to_watch = i; tx_ring->buffer_info[first].next_to_watch = i;
return count; return count + 1;
} }
static inline void igbvf_tx_queue_adv(struct igbvf_adapter *adapter, static inline void igbvf_tx_queue_adv(struct igbvf_adapter *adapter,
......
...@@ -1300,7 +1300,7 @@ ixgb_tx_map(struct ixgb_adapter *adapter, struct sk_buff *skb, ...@@ -1300,7 +1300,7 @@ ixgb_tx_map(struct ixgb_adapter *adapter, struct sk_buff *skb,
buffer_info->length = size; buffer_info->length = size;
WARN_ON(buffer_info->dma != 0); WARN_ON(buffer_info->dma != 0);
buffer_info->time_stamp = jiffies; buffer_info->time_stamp = jiffies;
buffer_info->dma = map[0] + offset; buffer_info->dma = skb_shinfo(skb)->dma_head + offset;
pci_map_single(adapter->pdev, pci_map_single(adapter->pdev,
skb->data + offset, skb->data + offset,
size, size,
...@@ -1340,7 +1340,7 @@ ixgb_tx_map(struct ixgb_adapter *adapter, struct sk_buff *skb, ...@@ -1340,7 +1340,7 @@ ixgb_tx_map(struct ixgb_adapter *adapter, struct sk_buff *skb,
buffer_info->length = size; buffer_info->length = size;
buffer_info->time_stamp = jiffies; buffer_info->time_stamp = jiffies;
buffer_info->dma = map[f + 1] + offset; buffer_info->dma = map[f] + offset;
buffer_info->next_to_watch = 0; buffer_info->next_to_watch = 0;
len -= size; len -= size;
......
...@@ -4837,7 +4837,7 @@ static int ixgbe_tx_map(struct ixgbe_adapter *adapter, ...@@ -4837,7 +4837,7 @@ static int ixgbe_tx_map(struct ixgbe_adapter *adapter,
size = min(len, (uint)IXGBE_MAX_DATA_PER_TXD); size = min(len, (uint)IXGBE_MAX_DATA_PER_TXD);
tx_buffer_info->length = size; tx_buffer_info->length = size;
tx_buffer_info->dma = map[0] + offset; tx_buffer_info->dma = skb_shinfo(skb)->dma_head + offset;
tx_buffer_info->time_stamp = jiffies; tx_buffer_info->time_stamp = jiffies;
tx_buffer_info->next_to_watch = i; tx_buffer_info->next_to_watch = i;
...@@ -4869,7 +4869,7 @@ static int ixgbe_tx_map(struct ixgbe_adapter *adapter, ...@@ -4869,7 +4869,7 @@ static int ixgbe_tx_map(struct ixgbe_adapter *adapter,
size = min(len, (uint)IXGBE_MAX_DATA_PER_TXD); size = min(len, (uint)IXGBE_MAX_DATA_PER_TXD);
tx_buffer_info->length = size; tx_buffer_info->length = size;
tx_buffer_info->dma = map[f + 1] + offset; tx_buffer_info->dma = map[f] + offset;
tx_buffer_info->time_stamp = jiffies; tx_buffer_info->time_stamp = jiffies;
tx_buffer_info->next_to_watch = i; tx_buffer_info->next_to_watch = i;
......
...@@ -5021,7 +5021,7 @@ static int tigon3_dma_hwbug_workaround(struct tg3 *tp, struct sk_buff *skb, ...@@ -5021,7 +5021,7 @@ static int tigon3_dma_hwbug_workaround(struct tg3 *tp, struct sk_buff *skb,
/* New SKB is guaranteed to be linear. */ /* New SKB is guaranteed to be linear. */
entry = *start; entry = *start;
ret = skb_dma_map(&tp->pdev->dev, new_skb, DMA_TO_DEVICE); ret = skb_dma_map(&tp->pdev->dev, new_skb, DMA_TO_DEVICE);
new_addr = skb_shinfo(new_skb)->dma_maps[0]; new_addr = skb_shinfo(new_skb)->dma_head;
/* Make sure new skb does not cross any 4G boundaries. /* Make sure new skb does not cross any 4G boundaries.
* Drop the packet if it does. * Drop the packet if it does.
...@@ -5155,7 +5155,7 @@ static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -5155,7 +5155,7 @@ static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
sp = skb_shinfo(skb); sp = skb_shinfo(skb);
mapping = sp->dma_maps[0]; mapping = sp->dma_head;
tp->tx_buffers[entry].skb = skb; tp->tx_buffers[entry].skb = skb;
...@@ -5173,7 +5173,7 @@ static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -5173,7 +5173,7 @@ static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
len = frag->size; len = frag->size;
mapping = sp->dma_maps[i + 1]; mapping = sp->dma_maps[i];
tp->tx_buffers[entry].skb = NULL; tp->tx_buffers[entry].skb = NULL;
tg3_set_txd(tp, entry, mapping, len, tg3_set_txd(tp, entry, mapping, len,
...@@ -5331,7 +5331,7 @@ static int tg3_start_xmit_dma_bug(struct sk_buff *skb, struct net_device *dev) ...@@ -5331,7 +5331,7 @@ static int tg3_start_xmit_dma_bug(struct sk_buff *skb, struct net_device *dev)
sp = skb_shinfo(skb); sp = skb_shinfo(skb);
mapping = sp->dma_maps[0]; mapping = sp->dma_head;
tp->tx_buffers[entry].skb = skb; tp->tx_buffers[entry].skb = skb;
...@@ -5356,7 +5356,7 @@ static int tg3_start_xmit_dma_bug(struct sk_buff *skb, struct net_device *dev) ...@@ -5356,7 +5356,7 @@ static int tg3_start_xmit_dma_bug(struct sk_buff *skb, struct net_device *dev)
skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
len = frag->size; len = frag->size;
mapping = sp->dma_maps[i + 1]; mapping = sp->dma_maps[i];
tp->tx_buffers[entry].skb = NULL; tp->tx_buffers[entry].skb = NULL;
......
...@@ -189,6 +189,9 @@ struct skb_shared_info { ...@@ -189,6 +189,9 @@ struct skb_shared_info {
atomic_t dataref; atomic_t dataref;
unsigned short nr_frags; unsigned short nr_frags;
unsigned short gso_size; unsigned short gso_size;
#ifdef CONFIG_HAS_DMA
dma_addr_t dma_head;
#endif
/* Warning: this field is not always filled in (UFO)! */ /* Warning: this field is not always filled in (UFO)! */
unsigned short gso_segs; unsigned short gso_segs;
unsigned short gso_type; unsigned short gso_type;
...@@ -198,7 +201,7 @@ struct skb_shared_info { ...@@ -198,7 +201,7 @@ struct skb_shared_info {
struct skb_shared_hwtstamps hwtstamps; struct skb_shared_hwtstamps hwtstamps;
skb_frag_t frags[MAX_SKB_FRAGS]; skb_frag_t frags[MAX_SKB_FRAGS];
#ifdef CONFIG_HAS_DMA #ifdef CONFIG_HAS_DMA
dma_addr_t dma_maps[MAX_SKB_FRAGS + 1]; dma_addr_t dma_maps[MAX_SKB_FRAGS];
#endif #endif
/* Intermediate layers must ensure that destructor_arg /* Intermediate layers must ensure that destructor_arg
* remains valid until skb destructor */ * remains valid until skb destructor */
......
...@@ -20,7 +20,7 @@ int skb_dma_map(struct device *dev, struct sk_buff *skb, ...@@ -20,7 +20,7 @@ int skb_dma_map(struct device *dev, struct sk_buff *skb,
if (dma_mapping_error(dev, map)) if (dma_mapping_error(dev, map))
goto out_err; goto out_err;
sp->dma_maps[0] = map; sp->dma_head = map;
for (i = 0; i < sp->nr_frags; i++) { for (i = 0; i < sp->nr_frags; i++) {
skb_frag_t *fp = &sp->frags[i]; skb_frag_t *fp = &sp->frags[i];
...@@ -28,7 +28,7 @@ int skb_dma_map(struct device *dev, struct sk_buff *skb, ...@@ -28,7 +28,7 @@ int skb_dma_map(struct device *dev, struct sk_buff *skb,
fp->size, dir); fp->size, dir);
if (dma_mapping_error(dev, map)) if (dma_mapping_error(dev, map))
goto unwind; goto unwind;
sp->dma_maps[i + 1] = map; sp->dma_maps[i] = map;
} }
return 0; return 0;
...@@ -37,10 +37,10 @@ int skb_dma_map(struct device *dev, struct sk_buff *skb, ...@@ -37,10 +37,10 @@ int skb_dma_map(struct device *dev, struct sk_buff *skb,
while (--i >= 0) { while (--i >= 0) {
skb_frag_t *fp = &sp->frags[i]; skb_frag_t *fp = &sp->frags[i];
dma_unmap_page(dev, sp->dma_maps[i + 1], dma_unmap_page(dev, sp->dma_maps[i],
fp->size, dir); fp->size, dir);
} }
dma_unmap_single(dev, sp->dma_maps[0], dma_unmap_single(dev, sp->dma_head,
skb_headlen(skb), dir); skb_headlen(skb), dir);
out_err: out_err:
return -ENOMEM; return -ENOMEM;
...@@ -53,12 +53,12 @@ void skb_dma_unmap(struct device *dev, struct sk_buff *skb, ...@@ -53,12 +53,12 @@ void skb_dma_unmap(struct device *dev, struct sk_buff *skb,
struct skb_shared_info *sp = skb_shinfo(skb); struct skb_shared_info *sp = skb_shinfo(skb);
int i; int i;
dma_unmap_single(dev, sp->dma_maps[0], dma_unmap_single(dev, sp->dma_head,
skb_headlen(skb), dir); skb_headlen(skb), dir);
for (i = 0; i < sp->nr_frags; i++) { for (i = 0; i < sp->nr_frags; i++) {
skb_frag_t *fp = &sp->frags[i]; skb_frag_t *fp = &sp->frags[i];
dma_unmap_page(dev, sp->dma_maps[i + 1], dma_unmap_page(dev, sp->dma_maps[i],
fp->size, dir); fp->size, dir);
} }
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册