提交 ad5da10a 编写于 作者: O Olof Johansson 提交者: David S. Miller

pasemi_mac: further performance tweaks

pasemi_mac: further performance tweaks

Misc driver tweaks for pasemi_mac:
	* Increase ring size (really needed mostly on 10G)
	* Take out an unneeded barrier
	* Move around a few prefetches and reorder a few calls
	* Don't try to clean on full tx buffer, just let things
	  take their course and stop the queue directly
	* Avoid filling on the same line as the interface is
	  working on to reduce cache line bouncing
	* Avoid unneeded clearing of software state (and make the
	  interface shutdown code handle it)
	* Fix up some of the tx ring wrap logic.
Signed-off-by: NOlof Johansson <olof@lixom.net>
Signed-off-by: NJeff Garzik <jeff@garzik.org>
上级 8dc121a4
master alk-4.19.24 alk-4.19.30 alk-4.19.34 alk-4.19.36 alk-4.19.43 alk-4.19.48 alk-4.19.57 ck-4.19.67 ck-4.19.81 ck-4.19.91 github/fork/deepanshu1422/fix-typo-in-comment github/fork/haosdent/fix-typo linux-next v4.19.91 v4.19.90 v4.19.89 v4.19.88 v4.19.87 v4.19.86 v4.19.85 v4.19.84 v4.19.83 v4.19.82 v4.19.81 v4.19.80 v4.19.79 v4.19.78 v4.19.77 v4.19.76 v4.19.75 v4.19.74 v4.19.73 v4.19.72 v4.19.71 v4.19.70 v4.19.69 v4.19.68 v4.19.67 v4.19.66 v4.19.65 v4.19.64 v4.19.63 v4.19.62 v4.19.61 v4.19.60 v4.19.59 v4.19.58 v4.19.57 v4.19.56 v4.19.55 v4.19.54 v4.19.53 v4.19.52 v4.19.51 v4.19.50 v4.19.49 v4.19.48 v4.19.47 v4.19.46 v4.19.45 v4.19.44 v4.19.43 v4.19.42 v4.19.41 v4.19.40 v4.19.39 v4.19.38 v4.19.37 v4.19.36 v4.19.35 v4.19.34 v4.19.33 v4.19.32 v4.19.31 v4.19.30 v4.19.29 v4.19.28 v4.19.27 v4.19.26 v4.19.25 v4.19.24 v4.19.23 v4.19.22 v4.19.21 v4.19.20 v4.19.19 v4.19.18 v4.19.17 v4.19.16 v4.19.15 v4.19.14 v4.19.13 v4.19.12 v4.19.11 v4.19.10 v4.19.9 v4.19.8 v4.19.7 v4.19.6 v4.19.5 v4.19.4 v4.19.3 v4.19.2 v4.19.1 v4.19 v4.19-rc8 v4.19-rc7 v4.19-rc6 v4.19-rc5 v4.19-rc4 v4.19-rc3 v4.19-rc2 v4.19-rc1 ck-release-21 ck-release-20 ck-release-19.2 ck-release-19.1 ck-release-19 ck-release-18 ck-release-17.2 ck-release-17.1 ck-release-17 ck-release-16 ck-release-15.1 ck-release-15 ck-release-14 ck-release-13.2 ck-release-13 ck-release-12 ck-release-11 ck-release-10 ck-release-9 ck-release-7 alk-release-15 alk-release-14 alk-release-13.2 alk-release-13 alk-release-12 alk-release-11 alk-release-10 alk-release-9 alk-release-7
无相关合并请求
......@@ -56,8 +56,8 @@
/* Must be a power of two */
#define RX_RING_SIZE 512
#define TX_RING_SIZE 512
#define RX_RING_SIZE 4096
#define TX_RING_SIZE 4096
#define DEFAULT_MSG_ENABLE \
(NETIF_MSG_DRV | \
......@@ -336,8 +336,16 @@ static void pasemi_mac_free_tx_resources(struct net_device *dev)
struct pasemi_mac_buffer *info;
dma_addr_t dmas[MAX_SKB_FRAGS+1];
int freed;
int start, limit;
for (i = 0; i < TX_RING_SIZE; i += freed) {
start = mac->tx->next_to_clean;
limit = mac->tx->next_to_fill;
/* Compensate for when fill has wrapped and clean has not */
if (start > limit)
limit += TX_RING_SIZE;
for (i = start; i < limit; i += freed) {
info = &TX_RING_INFO(mac, i+1);
if (info->dma && info->skb) {
for (j = 0; j <= skb_shinfo(info->skb)->nr_frags; j++)
......@@ -520,9 +528,6 @@ static int pasemi_mac_clean_rx(struct pasemi_mac *mac, int limit)
n = mac->rx->next_to_clean;
for (count = limit; count; count--) {
rmb();
macrx = RX_RING(mac, n);
if ((macrx & XCT_MACRX_E) ||
......@@ -550,14 +555,10 @@ static int pasemi_mac_clean_rx(struct pasemi_mac *mac, int limit)
break;
}
prefetchw(info);
skb = info->skb;
prefetchw(skb);
info->dma = 0;
pci_unmap_single(mac->dma_pdev, dma, skb->len,
PCI_DMA_FROMDEVICE);
prefetch(skb);
prefetch(&skb->data_len);
len = (macrx & XCT_MACRX_LLEN_M) >> XCT_MACRX_LLEN_S;
......@@ -576,10 +577,9 @@ static int pasemi_mac_clean_rx(struct pasemi_mac *mac, int limit)
} else
info->skb = NULL;
/* Need to zero it out since hardware doesn't, since the
* replenish loop uses it to tell when it's done.
*/
RX_BUFF(mac, i) = 0;
pci_unmap_single(mac->dma_pdev, dma, len, PCI_DMA_FROMDEVICE);
info->dma = 0;
skb_put(skb, len);
......@@ -599,6 +599,11 @@ static int pasemi_mac_clean_rx(struct pasemi_mac *mac, int limit)
RX_RING(mac, n) = 0;
RX_RING(mac, n+1) = 0;
/* Need to zero it out since hardware doesn't, since the
* replenish loop uses it to tell when it's done.
*/
RX_BUFF(mac, i) = 0;
n += 2;
}
......@@ -621,27 +626,33 @@ static int pasemi_mac_clean_rx(struct pasemi_mac *mac, int limit)
static int pasemi_mac_clean_tx(struct pasemi_mac *mac)
{
int i, j;
struct pasemi_mac_buffer *info;
unsigned int start, descr_count, buf_count, limit;
unsigned int start, descr_count, buf_count, batch_limit;
unsigned int ring_limit;
unsigned int total_count;
unsigned long flags;
struct sk_buff *skbs[TX_CLEAN_BATCHSIZE];
dma_addr_t dmas[TX_CLEAN_BATCHSIZE][MAX_SKB_FRAGS+1];
total_count = 0;
limit = TX_CLEAN_BATCHSIZE;
batch_limit = TX_CLEAN_BATCHSIZE;
restart:
spin_lock_irqsave(&mac->tx->lock, flags);
start = mac->tx->next_to_clean;
ring_limit = mac->tx->next_to_fill;
/* Compensate for when fill has wrapped but clean has not */
if (start > ring_limit)
ring_limit += TX_RING_SIZE;
buf_count = 0;
descr_count = 0;
for (i = start;
descr_count < limit && i < mac->tx->next_to_fill;
descr_count < batch_limit && i < ring_limit;
i += buf_count) {
u64 mactx = TX_RING(mac, i);
struct sk_buff *skb;
if ((mactx & XCT_MACTX_E) ||
(*mac->tx_status & PAS_STATUS_ERROR))
......@@ -651,19 +662,15 @@ static int pasemi_mac_clean_tx(struct pasemi_mac *mac)
/* Not yet transmitted */
break;
info = &TX_RING_INFO(mac, i+1);
skbs[descr_count] = info->skb;
skb = TX_RING_INFO(mac, i+1).skb;
skbs[descr_count] = skb;
buf_count = 2 + skb_shinfo(info->skb)->nr_frags;
for (j = 0; j <= skb_shinfo(info->skb)->nr_frags; j++)
buf_count = 2 + skb_shinfo(skb)->nr_frags;
for (j = 0; j <= skb_shinfo(skb)->nr_frags; j++)
dmas[descr_count][j] = TX_RING_INFO(mac, i+1+j).dma;
info->dma = 0;
TX_RING(mac, i) = 0;
TX_RING(mac, i+1) = 0;
TX_RING_INFO(mac, i+1).skb = 0;
TX_RING_INFO(mac, i+1).dma = 0;
/* Since we always fill with an even number of entries, make
* sure we skip any unused one at the end as well.
......@@ -672,7 +679,7 @@ static int pasemi_mac_clean_tx(struct pasemi_mac *mac)
buf_count++;
descr_count++;
}
mac->tx->next_to_clean = i;
mac->tx->next_to_clean = i & (TX_RING_SIZE-1);
spin_unlock_irqrestore(&mac->tx->lock, flags);
netif_wake_queue(mac->netdev);
......@@ -683,7 +690,7 @@ static int pasemi_mac_clean_tx(struct pasemi_mac *mac)
total_count += descr_count;
/* If the batch was full, try to clean more */
if (descr_count == limit)
if (descr_count == batch_limit)
goto restart;
return total_count;
......@@ -1106,19 +1113,14 @@ static int pasemi_mac_start_tx(struct sk_buff *skb, struct net_device *dev)
spin_lock_irqsave(&txring->lock, flags);
if (RING_AVAIL(txring) <= nfrags+3) {
spin_unlock_irqrestore(&txring->lock, flags);
pasemi_mac_clean_tx(mac);
pasemi_mac_restart_tx_intr(mac);
spin_lock_irqsave(&txring->lock, flags);
if (RING_AVAIL(txring) <= nfrags+3) {
/* Still no room -- stop the queue and wait for tx
* intr when there's room.
*/
netif_stop_queue(dev);
goto out_err;
}
/* Avoid stepping on the same cache line that the DMA controller
* is currently about to send, so leave at least 8 words available.
* Total free space needed is mactx + fragments + 8
*/
if (RING_AVAIL(txring) < nfrags + 10) {
/* no room -- stop the queue and wait for tx intr */
netif_stop_queue(dev);
goto out_err;
}
TX_RING(mac, txring->next_to_fill) = mactx;
......@@ -1137,8 +1139,8 @@ static int pasemi_mac_start_tx(struct sk_buff *skb, struct net_device *dev)
if (nfrags & 1)
nfrags++;
txring->next_to_fill += nfrags + 1;
txring->next_to_fill = (txring->next_to_fill + nfrags + 1) &
(TX_RING_SIZE-1);
dev->stats.tx_packets++;
dev->stats.tx_bytes += skb->len;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册
反馈
建议
客服 返回
顶部