提交 3eb1a40f 编写于 作者: A Alexander Duyck 提交者: Jeff Kirsher

igbvf: Make next_to_watch a pointer and adjust memory barriers to avoid races

This change is meant to address several race issues that become possible
because next_to_watch could possibly be set to a value that shows that the
descriptor is done when it is not.  In order to correct that we instead make
next_to_watch a pointer that is set to NULL during cleanup, and set to the
eop_desc after the descriptor rings have been written.

To enforce proper ordering the next_to_watch pointer is not set until after
a wmb writing the values to the last descriptor in a transmit.  In order to
guarantee that the descriptor is not read until after the eop_desc we use the
read_barrier_depends which is only really necessary on the alpha architecture.
Signed-off-by: NAlexander Duyck <alexander.h.duyck@intel.com>
Acked-by: NGreg Rose <gregory.v.rose@intel.com>
Tested-by: NSibai Li <sibai.li@intel.com>
Signed-off-by: NJeff Kirsher <jeffrey.t.kirsher@intel.com>
上级 e792cd91
master alk-4.19.24 alk-4.19.30 alk-4.19.34 alk-4.19.36 alk-4.19.43 alk-4.19.48 alk-4.19.57 ck-4.19.67 ck-4.19.81 ck-4.19.91 github/fork/deepanshu1422/fix-typo-in-comment github/fork/haosdent/fix-typo linux-next v4.19.91 v4.19.90 v4.19.89 v4.19.88 v4.19.87 v4.19.86 v4.19.85 v4.19.84 v4.19.83 v4.19.82 v4.19.81 v4.19.80 v4.19.79 v4.19.78 v4.19.77 v4.19.76 v4.19.75 v4.19.74 v4.19.73 v4.19.72 v4.19.71 v4.19.70 v4.19.69 v4.19.68 v4.19.67 v4.19.66 v4.19.65 v4.19.64 v4.19.63 v4.19.62 v4.19.61 v4.19.60 v4.19.59 v4.19.58 v4.19.57 v4.19.56 v4.19.55 v4.19.54 v4.19.53 v4.19.52 v4.19.51 v4.19.50 v4.19.49 v4.19.48 v4.19.47 v4.19.46 v4.19.45 v4.19.44 v4.19.43 v4.19.42 v4.19.41 v4.19.40 v4.19.39 v4.19.38 v4.19.37 v4.19.36 v4.19.35 v4.19.34 v4.19.33 v4.19.32 v4.19.31 v4.19.30 v4.19.29 v4.19.28 v4.19.27 v4.19.26 v4.19.25 v4.19.24 v4.19.23 v4.19.22 v4.19.21 v4.19.20 v4.19.19 v4.19.18 v4.19.17 v4.19.16 v4.19.15 v4.19.14 v4.19.13 v4.19.12 v4.19.11 v4.19.10 v4.19.9 v4.19.8 v4.19.7 v4.19.6 v4.19.5 v4.19.4 v4.19.3 v4.19.2 v4.19.1 v4.19 v4.19-rc8 v4.19-rc7 v4.19-rc6 v4.19-rc5 v4.19-rc4 v4.19-rc3 v4.19-rc2 v4.19-rc1 ck-release-21 ck-release-20 ck-release-19.2 ck-release-19.1 ck-release-19 ck-release-18 ck-release-17.2 ck-release-17.1 ck-release-17 ck-release-16 ck-release-15.1 ck-release-15 ck-release-14 ck-release-13.2 ck-release-13 ck-release-12 ck-release-11 ck-release-10 ck-release-9 ck-release-7 alk-release-15 alk-release-14 alk-release-13.2 alk-release-13 alk-release-12 alk-release-11 alk-release-10 alk-release-9 alk-release-7
无相关合并请求
......@@ -127,8 +127,8 @@ struct igbvf_buffer {
/* Tx */
struct {
unsigned long time_stamp;
union e1000_adv_tx_desc *next_to_watch;
u16 length;
u16 next_to_watch;
u16 mapped_as_page;
};
/* Rx */
......
......@@ -797,20 +797,31 @@ static bool igbvf_clean_tx_irq(struct igbvf_ring *tx_ring)
struct sk_buff *skb;
union e1000_adv_tx_desc *tx_desc, *eop_desc;
unsigned int total_bytes = 0, total_packets = 0;
unsigned int i, eop, count = 0;
unsigned int i, count = 0;
bool cleaned = false;
i = tx_ring->next_to_clean;
eop = tx_ring->buffer_info[i].next_to_watch;
eop_desc = IGBVF_TX_DESC_ADV(*tx_ring, eop);
buffer_info = &tx_ring->buffer_info[i];
eop_desc = buffer_info->next_to_watch;
do {
/* if next_to_watch is not set then there is no work pending */
if (!eop_desc)
break;
/* prevent any other reads prior to eop_desc */
read_barrier_depends();
/* if DD is not set pending work has not been completed */
if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)))
break;
/* clear next_to_watch to prevent false hangs */
buffer_info->next_to_watch = NULL;
while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
(count < tx_ring->count)) {
rmb(); /* read buffer_info after eop_desc status */
for (cleaned = false; !cleaned; count++) {
tx_desc = IGBVF_TX_DESC_ADV(*tx_ring, i);
buffer_info = &tx_ring->buffer_info[i];
cleaned = (i == eop);
cleaned = (tx_desc == eop_desc);
skb = buffer_info->skb;
if (skb) {
......@@ -831,10 +842,12 @@ static bool igbvf_clean_tx_irq(struct igbvf_ring *tx_ring)
i++;
if (i == tx_ring->count)
i = 0;
buffer_info = &tx_ring->buffer_info[i];
}
eop = tx_ring->buffer_info[i].next_to_watch;
eop_desc = IGBVF_TX_DESC_ADV(*tx_ring, eop);
}
eop_desc = buffer_info->next_to_watch;
} while (count < tx_ring->count);
tx_ring->next_to_clean = i;
......@@ -1961,7 +1974,6 @@ static int igbvf_tso(struct igbvf_adapter *adapter,
context_desc->seqnum_seed = 0;
buffer_info->time_stamp = jiffies;
buffer_info->next_to_watch = i;
buffer_info->dma = 0;
i++;
if (i == tx_ring->count)
......@@ -2021,7 +2033,6 @@ static inline bool igbvf_tx_csum(struct igbvf_adapter *adapter,
context_desc->mss_l4len_idx = 0;
buffer_info->time_stamp = jiffies;
buffer_info->next_to_watch = i;
buffer_info->dma = 0;
i++;
if (i == tx_ring->count)
......@@ -2061,8 +2072,7 @@ static int igbvf_maybe_stop_tx(struct net_device *netdev, int size)
static inline int igbvf_tx_map_adv(struct igbvf_adapter *adapter,
struct igbvf_ring *tx_ring,
struct sk_buff *skb,
unsigned int first)
struct sk_buff *skb)
{
struct igbvf_buffer *buffer_info;
struct pci_dev *pdev = adapter->pdev;
......@@ -2077,7 +2087,6 @@ static inline int igbvf_tx_map_adv(struct igbvf_adapter *adapter,
buffer_info->length = len;
/* set time_stamp *before* dma to help avoid a possible race */
buffer_info->time_stamp = jiffies;
buffer_info->next_to_watch = i;
buffer_info->mapped_as_page = false;
buffer_info->dma = dma_map_single(&pdev->dev, skb->data, len,
DMA_TO_DEVICE);
......@@ -2100,7 +2109,6 @@ static inline int igbvf_tx_map_adv(struct igbvf_adapter *adapter,
BUG_ON(len >= IGBVF_MAX_DATA_PER_TXD);
buffer_info->length = len;
buffer_info->time_stamp = jiffies;
buffer_info->next_to_watch = i;
buffer_info->mapped_as_page = true;
buffer_info->dma = skb_frag_dma_map(&pdev->dev, frag, 0, len,
DMA_TO_DEVICE);
......@@ -2109,7 +2117,6 @@ static inline int igbvf_tx_map_adv(struct igbvf_adapter *adapter,
}
tx_ring->buffer_info[i].skb = skb;
tx_ring->buffer_info[first].next_to_watch = i;
return ++count;
......@@ -2120,7 +2127,6 @@ static inline int igbvf_tx_map_adv(struct igbvf_adapter *adapter,
buffer_info->dma = 0;
buffer_info->time_stamp = 0;
buffer_info->length = 0;
buffer_info->next_to_watch = 0;
buffer_info->mapped_as_page = false;
if (count)
count--;
......@@ -2139,7 +2145,8 @@ static inline int igbvf_tx_map_adv(struct igbvf_adapter *adapter,
static inline void igbvf_tx_queue_adv(struct igbvf_adapter *adapter,
struct igbvf_ring *tx_ring,
int tx_flags, int count, u32 paylen,
int tx_flags, int count,
unsigned int first, u32 paylen,
u8 hdr_len)
{
union e1000_adv_tx_desc *tx_desc = NULL;
......@@ -2189,6 +2196,7 @@ static inline void igbvf_tx_queue_adv(struct igbvf_adapter *adapter,
* such as IA-64). */
wmb();
tx_ring->buffer_info[first].next_to_watch = tx_desc;
tx_ring->next_to_use = i;
writel(i, adapter->hw.hw_addr + tx_ring->tail);
/* we need this if more than one processor can write to our tail
......@@ -2255,11 +2263,11 @@ static netdev_tx_t igbvf_xmit_frame_ring_adv(struct sk_buff *skb,
* count reflects descriptors mapped, if 0 then mapping error
* has occurred and we need to rewind the descriptor queue
*/
count = igbvf_tx_map_adv(adapter, tx_ring, skb, first);
count = igbvf_tx_map_adv(adapter, tx_ring, skb);
if (count) {
igbvf_tx_queue_adv(adapter, tx_ring, tx_flags, count,
skb->len, hdr_len);
first, skb->len, hdr_len);
/* Make sure there is space in the ring for the next send. */
igbvf_maybe_stop_tx(netdev, MAX_SKB_FRAGS + 4);
} else {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册
反馈
建议
客服 返回
顶部