提交 061c1a6e 编写于 作者: D David S. Miller

Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/net

Jeff Kirsher says:

====================
Intel Wired LAN Driver Updates 2015-02-26

This series contains fixes for i40e and i40evf only.

Alexey Khoroshilov found a possible leak of 'cmd_buf' when copy_from_user()
failed in i40e_dbg_command_write(), so resolved by calling kfree().

Shannon provides a fix to ensure the shift and bitwise precedences do not
work backwards for us by adding parans.  Fixed the driver by preventing
the driver from allowing stray interrupts or causing system logs from
un-handled interrupts by combining the ICR0 shutdown with the standard
interrupt shutdown and add the interrupt clearing to the PCI shutdown
path.  Fixed an issue where a NVM write times out before a transaction
can complete, so Shannon added logic to make another attempt by
reacquiring the semaphore, then retry the write, if the one retry fails,
we will then give up.  Adds checks to pointers before their use to ensure
we do not try to dereference NULL pointers when returning values from the
AdminQ calls.

Akeem adds a check to bail out if the device is already down when checking
for Tx hang subtask.

Anjali fixes TSO with more than 8 frags per segment issue.  The hardware
has some limitations which the driver needs to adhere to:
  1) no more than 8 descriptors per packet on the wire
  2) no header can span more than 3 descriptors
If one of these events happens, the hardware will generate an internal
error and freeze the Tx queue, so Anjali fixes this by linearizes the skb
to avoid these situations.  Fixed an issue where the per Traffic Class
queue count was higher than queues enabled, which will fix a warning
with multiple function mode where systems regularly have more cores than
vectors.  Fixed TCP/IPv6 over VXLAN Tx checksum offload, where we were
checking the outer protocol flags and deciding the flow for the inner
header.

Jesse fixes a race condition in the transmit hang detection.  Before we
were having issues of false Tx hang detection, no the driver makes more
direct with the checks for progress forward by directly checking the head
write back address and tail register when determining progress.  This
avoids Tx hangs where the software gets behind, because we are directly
checking hardware state when determining a hang state.

Neerav fixes the transmit ring Qset handle when DCB reconfigures. The issue
was when DCB is reconfigured to a single traffic class (TC) and the driver
did not reset the Tx ring Qset handle to correct the mapping, which caused
the Tx queue to disable timeouts.  Also as part of DCB reconfiguration flow
if the Tx queue disable times out, then issue a PF reset to do some level
of recovery.

Mitch stops flow director on shutdown because, in some cases, the hardware
would continue to try to access the FDIR ring after entering D3Hot state,
which would cause either PCIe errors or NMIs, depending upon the system
configuration.

* NOTE * I have verified that this series of patches for net will not cause
any merge issues when you sync up your net tree with your net-next tree.
====================
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
...@@ -868,8 +868,9 @@ i40e_status i40e_pf_reset(struct i40e_hw *hw) ...@@ -868,8 +868,9 @@ i40e_status i40e_pf_reset(struct i40e_hw *hw)
* The grst delay value is in 100ms units, and we'll wait a * The grst delay value is in 100ms units, and we'll wait a
* couple counts longer to be sure we don't just miss the end. * couple counts longer to be sure we don't just miss the end.
*/ */
grst_del = rd32(hw, I40E_GLGEN_RSTCTL) & I40E_GLGEN_RSTCTL_GRSTDEL_MASK grst_del = (rd32(hw, I40E_GLGEN_RSTCTL) &
>> I40E_GLGEN_RSTCTL_GRSTDEL_SHIFT; I40E_GLGEN_RSTCTL_GRSTDEL_MASK) >>
I40E_GLGEN_RSTCTL_GRSTDEL_SHIFT;
for (cnt = 0; cnt < grst_del + 2; cnt++) { for (cnt = 0; cnt < grst_del + 2; cnt++) {
reg = rd32(hw, I40E_GLGEN_RSTAT); reg = rd32(hw, I40E_GLGEN_RSTAT);
if (!(reg & I40E_GLGEN_RSTAT_DEVSTATE_MASK)) if (!(reg & I40E_GLGEN_RSTAT_DEVSTATE_MASK))
...@@ -2846,7 +2847,7 @@ i40e_status i40e_aq_add_udp_tunnel(struct i40e_hw *hw, ...@@ -2846,7 +2847,7 @@ i40e_status i40e_aq_add_udp_tunnel(struct i40e_hw *hw,
status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details); status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
if (!status) if (!status && filter_index)
*filter_index = resp->index; *filter_index = resp->index;
return status; return status;
......
...@@ -40,7 +40,7 @@ static void i40e_get_pfc_delay(struct i40e_hw *hw, u16 *delay) ...@@ -40,7 +40,7 @@ static void i40e_get_pfc_delay(struct i40e_hw *hw, u16 *delay)
u32 val; u32 val;
val = rd32(hw, I40E_PRTDCB_GENC); val = rd32(hw, I40E_PRTDCB_GENC);
*delay = (u16)(val & I40E_PRTDCB_GENC_PFCLDA_MASK >> *delay = (u16)((val & I40E_PRTDCB_GENC_PFCLDA_MASK) >>
I40E_PRTDCB_GENC_PFCLDA_SHIFT); I40E_PRTDCB_GENC_PFCLDA_SHIFT);
} }
......
...@@ -989,8 +989,10 @@ static ssize_t i40e_dbg_command_write(struct file *filp, ...@@ -989,8 +989,10 @@ static ssize_t i40e_dbg_command_write(struct file *filp,
if (!cmd_buf) if (!cmd_buf)
return count; return count;
bytes_not_copied = copy_from_user(cmd_buf, buffer, count); bytes_not_copied = copy_from_user(cmd_buf, buffer, count);
if (bytes_not_copied < 0) if (bytes_not_copied < 0) {
kfree(cmd_buf);
return bytes_not_copied; return bytes_not_copied;
}
if (bytes_not_copied > 0) if (bytes_not_copied > 0)
count -= bytes_not_copied; count -= bytes_not_copied;
cmd_buf[count] = '\0'; cmd_buf[count] = '\0';
......
...@@ -1512,7 +1512,12 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi, ...@@ -1512,7 +1512,12 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi,
vsi->tc_config.numtc = numtc; vsi->tc_config.numtc = numtc;
vsi->tc_config.enabled_tc = enabled_tc ? enabled_tc : 1; vsi->tc_config.enabled_tc = enabled_tc ? enabled_tc : 1;
/* Number of queues per enabled TC */ /* Number of queues per enabled TC */
num_tc_qps = vsi->alloc_queue_pairs/numtc; /* In MFP case we can have a much lower count of MSIx
* vectors available and so we need to lower the used
* q count.
*/
qcount = min_t(int, vsi->alloc_queue_pairs, pf->num_lan_msix);
num_tc_qps = qcount / numtc;
num_tc_qps = min_t(int, num_tc_qps, I40E_MAX_QUEUES_PER_TC); num_tc_qps = min_t(int, num_tc_qps, I40E_MAX_QUEUES_PER_TC);
/* Setup queue offset/count for all TCs for given VSI */ /* Setup queue offset/count for all TCs for given VSI */
...@@ -2684,8 +2689,15 @@ static void i40e_vsi_config_dcb_rings(struct i40e_vsi *vsi) ...@@ -2684,8 +2689,15 @@ static void i40e_vsi_config_dcb_rings(struct i40e_vsi *vsi)
u16 qoffset, qcount; u16 qoffset, qcount;
int i, n; int i, n;
if (!(vsi->back->flags & I40E_FLAG_DCB_ENABLED)) if (!(vsi->back->flags & I40E_FLAG_DCB_ENABLED)) {
return; /* Reset the TC information */
for (i = 0; i < vsi->num_queue_pairs; i++) {
rx_ring = vsi->rx_rings[i];
tx_ring = vsi->tx_rings[i];
rx_ring->dcb_tc = 0;
tx_ring->dcb_tc = 0;
}
}
for (n = 0; n < I40E_MAX_TRAFFIC_CLASS; n++) { for (n = 0; n < I40E_MAX_TRAFFIC_CLASS; n++) {
if (!(vsi->tc_config.enabled_tc & (1 << n))) if (!(vsi->tc_config.enabled_tc & (1 << n)))
...@@ -3830,6 +3842,12 @@ static void i40e_clear_interrupt_scheme(struct i40e_pf *pf) ...@@ -3830,6 +3842,12 @@ static void i40e_clear_interrupt_scheme(struct i40e_pf *pf)
{ {
int i; int i;
i40e_stop_misc_vector(pf);
if (pf->flags & I40E_FLAG_MSIX_ENABLED) {
synchronize_irq(pf->msix_entries[0].vector);
free_irq(pf->msix_entries[0].vector, pf);
}
i40e_put_lump(pf->irq_pile, 0, I40E_PILE_VALID_BIT-1); i40e_put_lump(pf->irq_pile, 0, I40E_PILE_VALID_BIT-1);
for (i = 0; i < pf->num_alloc_vsi; i++) for (i = 0; i < pf->num_alloc_vsi; i++)
if (pf->vsi[i]) if (pf->vsi[i])
...@@ -5254,8 +5272,14 @@ static int i40e_handle_lldp_event(struct i40e_pf *pf, ...@@ -5254,8 +5272,14 @@ static int i40e_handle_lldp_event(struct i40e_pf *pf,
/* Wait for the PF's Tx queues to be disabled */ /* Wait for the PF's Tx queues to be disabled */
ret = i40e_pf_wait_txq_disabled(pf); ret = i40e_pf_wait_txq_disabled(pf);
if (!ret) if (ret) {
/* Schedule PF reset to recover */
set_bit(__I40E_PF_RESET_REQUESTED, &pf->state);
i40e_service_event_schedule(pf);
} else {
i40e_pf_unquiesce_all_vsi(pf); i40e_pf_unquiesce_all_vsi(pf);
}
exit: exit:
return ret; return ret;
} }
...@@ -5587,7 +5611,8 @@ static void i40e_check_hang_subtask(struct i40e_pf *pf) ...@@ -5587,7 +5611,8 @@ static void i40e_check_hang_subtask(struct i40e_pf *pf)
int i, v; int i, v;
/* If we're down or resetting, just bail */ /* If we're down or resetting, just bail */
if (test_bit(__I40E_CONFIG_BUSY, &pf->state)) if (test_bit(__I40E_DOWN, &pf->state) ||
test_bit(__I40E_CONFIG_BUSY, &pf->state))
return; return;
/* for each VSI/netdev /* for each VSI/netdev
...@@ -9533,6 +9558,7 @@ static void i40e_remove(struct pci_dev *pdev) ...@@ -9533,6 +9558,7 @@ static void i40e_remove(struct pci_dev *pdev)
set_bit(__I40E_DOWN, &pf->state); set_bit(__I40E_DOWN, &pf->state);
del_timer_sync(&pf->service_timer); del_timer_sync(&pf->service_timer);
cancel_work_sync(&pf->service_task); cancel_work_sync(&pf->service_task);
i40e_fdir_teardown(pf);
if (pf->flags & I40E_FLAG_SRIOV_ENABLED) { if (pf->flags & I40E_FLAG_SRIOV_ENABLED) {
i40e_free_vfs(pf); i40e_free_vfs(pf);
...@@ -9559,12 +9585,6 @@ static void i40e_remove(struct pci_dev *pdev) ...@@ -9559,12 +9585,6 @@ static void i40e_remove(struct pci_dev *pdev)
if (pf->vsi[pf->lan_vsi]) if (pf->vsi[pf->lan_vsi])
i40e_vsi_release(pf->vsi[pf->lan_vsi]); i40e_vsi_release(pf->vsi[pf->lan_vsi]);
i40e_stop_misc_vector(pf);
if (pf->flags & I40E_FLAG_MSIX_ENABLED) {
synchronize_irq(pf->msix_entries[0].vector);
free_irq(pf->msix_entries[0].vector, pf);
}
/* shutdown and destroy the HMC */ /* shutdown and destroy the HMC */
if (pf->hw.hmc.hmc_obj) { if (pf->hw.hmc.hmc_obj) {
ret_code = i40e_shutdown_lan_hmc(&pf->hw); ret_code = i40e_shutdown_lan_hmc(&pf->hw);
...@@ -9718,6 +9738,8 @@ static void i40e_shutdown(struct pci_dev *pdev) ...@@ -9718,6 +9738,8 @@ static void i40e_shutdown(struct pci_dev *pdev)
wr32(hw, I40E_PFPM_APM, (pf->wol_en ? I40E_PFPM_APM_APME_MASK : 0)); wr32(hw, I40E_PFPM_APM, (pf->wol_en ? I40E_PFPM_APM_APME_MASK : 0));
wr32(hw, I40E_PFPM_WUFC, (pf->wol_en ? I40E_PFPM_WUFC_MAG_MASK : 0)); wr32(hw, I40E_PFPM_WUFC, (pf->wol_en ? I40E_PFPM_WUFC_MAG_MASK : 0));
i40e_clear_interrupt_scheme(pf);
if (system_state == SYSTEM_POWER_OFF) { if (system_state == SYSTEM_POWER_OFF) {
pci_wake_from_d3(pdev, pf->wol_en); pci_wake_from_d3(pdev, pf->wol_en);
pci_set_power_state(pdev, PCI_D3hot); pci_set_power_state(pdev, PCI_D3hot);
......
...@@ -679,9 +679,11 @@ static i40e_status i40e_nvmupd_state_writing(struct i40e_hw *hw, ...@@ -679,9 +679,11 @@ static i40e_status i40e_nvmupd_state_writing(struct i40e_hw *hw,
{ {
i40e_status status; i40e_status status;
enum i40e_nvmupd_cmd upd_cmd; enum i40e_nvmupd_cmd upd_cmd;
bool retry_attempt = false;
upd_cmd = i40e_nvmupd_validate_command(hw, cmd, errno); upd_cmd = i40e_nvmupd_validate_command(hw, cmd, errno);
retry:
switch (upd_cmd) { switch (upd_cmd) {
case I40E_NVMUPD_WRITE_CON: case I40E_NVMUPD_WRITE_CON:
status = i40e_nvmupd_nvm_write(hw, cmd, bytes, errno); status = i40e_nvmupd_nvm_write(hw, cmd, bytes, errno);
...@@ -725,6 +727,39 @@ static i40e_status i40e_nvmupd_state_writing(struct i40e_hw *hw, ...@@ -725,6 +727,39 @@ static i40e_status i40e_nvmupd_state_writing(struct i40e_hw *hw,
*errno = -ESRCH; *errno = -ESRCH;
break; break;
} }
/* In some circumstances, a multi-write transaction takes longer
* than the default 3 minute timeout on the write semaphore. If
* the write failed with an EBUSY status, this is likely the problem,
* so here we try to reacquire the semaphore then retry the write.
* We only do one retry, then give up.
*/
if (status && (hw->aq.asq_last_status == I40E_AQ_RC_EBUSY) &&
!retry_attempt) {
i40e_status old_status = status;
u32 old_asq_status = hw->aq.asq_last_status;
u32 gtime;
gtime = rd32(hw, I40E_GLVFGEN_TIMER);
if (gtime >= hw->nvm.hw_semaphore_timeout) {
i40e_debug(hw, I40E_DEBUG_ALL,
"NVMUPD: write semaphore expired (%d >= %lld), retrying\n",
gtime, hw->nvm.hw_semaphore_timeout);
i40e_release_nvm(hw);
status = i40e_acquire_nvm(hw, I40E_RESOURCE_WRITE);
if (status) {
i40e_debug(hw, I40E_DEBUG_ALL,
"NVMUPD: write semaphore reacquire failed aq_err = %d\n",
hw->aq.asq_last_status);
status = old_status;
hw->aq.asq_last_status = old_asq_status;
} else {
retry_attempt = true;
goto retry;
}
}
}
return status; return status;
} }
......
...@@ -585,6 +585,20 @@ void i40e_free_tx_resources(struct i40e_ring *tx_ring) ...@@ -585,6 +585,20 @@ void i40e_free_tx_resources(struct i40e_ring *tx_ring)
} }
} }
/**
* i40e_get_head - Retrieve head from head writeback
* @tx_ring: tx ring to fetch head of
*
* Returns value of Tx ring head based on value stored
* in head write-back location
**/
static inline u32 i40e_get_head(struct i40e_ring *tx_ring)
{
void *head = (struct i40e_tx_desc *)tx_ring->desc + tx_ring->count;
return le32_to_cpu(*(volatile __le32 *)head);
}
/** /**
* i40e_get_tx_pending - how many tx descriptors not processed * i40e_get_tx_pending - how many tx descriptors not processed
* @tx_ring: the ring of descriptors * @tx_ring: the ring of descriptors
...@@ -594,10 +608,16 @@ void i40e_free_tx_resources(struct i40e_ring *tx_ring) ...@@ -594,10 +608,16 @@ void i40e_free_tx_resources(struct i40e_ring *tx_ring)
**/ **/
static u32 i40e_get_tx_pending(struct i40e_ring *ring) static u32 i40e_get_tx_pending(struct i40e_ring *ring)
{ {
u32 ntu = ((ring->next_to_clean <= ring->next_to_use) u32 head, tail;
? ring->next_to_use
: ring->next_to_use + ring->count); head = i40e_get_head(ring);
return ntu - ring->next_to_clean; tail = readl(ring->tail);
if (head != tail)
return (head < tail) ?
tail - head : (tail + ring->count - head);
return 0;
} }
/** /**
...@@ -606,6 +626,8 @@ static u32 i40e_get_tx_pending(struct i40e_ring *ring) ...@@ -606,6 +626,8 @@ static u32 i40e_get_tx_pending(struct i40e_ring *ring)
**/ **/
static bool i40e_check_tx_hang(struct i40e_ring *tx_ring) static bool i40e_check_tx_hang(struct i40e_ring *tx_ring)
{ {
u32 tx_done = tx_ring->stats.packets;
u32 tx_done_old = tx_ring->tx_stats.tx_done_old;
u32 tx_pending = i40e_get_tx_pending(tx_ring); u32 tx_pending = i40e_get_tx_pending(tx_ring);
struct i40e_pf *pf = tx_ring->vsi->back; struct i40e_pf *pf = tx_ring->vsi->back;
bool ret = false; bool ret = false;
...@@ -623,41 +645,25 @@ static bool i40e_check_tx_hang(struct i40e_ring *tx_ring) ...@@ -623,41 +645,25 @@ static bool i40e_check_tx_hang(struct i40e_ring *tx_ring)
* run the check_tx_hang logic with a transmit completion * run the check_tx_hang logic with a transmit completion
* pending but without time to complete it yet. * pending but without time to complete it yet.
*/ */
if ((tx_ring->tx_stats.tx_done_old == tx_ring->stats.packets) && if ((tx_done_old == tx_done) && tx_pending) {
(tx_pending >= I40E_MIN_DESC_PENDING)) {
/* make sure it is true for two checks in a row */ /* make sure it is true for two checks in a row */
ret = test_and_set_bit(__I40E_HANG_CHECK_ARMED, ret = test_and_set_bit(__I40E_HANG_CHECK_ARMED,
&tx_ring->state); &tx_ring->state);
} else if ((tx_ring->tx_stats.tx_done_old == tx_ring->stats.packets) && } else if (tx_done_old == tx_done &&
(tx_pending < I40E_MIN_DESC_PENDING) && (tx_pending < I40E_MIN_DESC_PENDING) && (tx_pending > 0)) {
(tx_pending > 0)) {
if (I40E_DEBUG_FLOW & pf->hw.debug_mask) if (I40E_DEBUG_FLOW & pf->hw.debug_mask)
dev_info(tx_ring->dev, "HW needs some more descs to do a cacheline flush. tx_pending %d, queue %d", dev_info(tx_ring->dev, "HW needs some more descs to do a cacheline flush. tx_pending %d, queue %d",
tx_pending, tx_ring->queue_index); tx_pending, tx_ring->queue_index);
pf->tx_sluggish_count++; pf->tx_sluggish_count++;
} else { } else {
/* update completed stats and disarm the hang check */ /* update completed stats and disarm the hang check */
tx_ring->tx_stats.tx_done_old = tx_ring->stats.packets; tx_ring->tx_stats.tx_done_old = tx_done;
clear_bit(__I40E_HANG_CHECK_ARMED, &tx_ring->state); clear_bit(__I40E_HANG_CHECK_ARMED, &tx_ring->state);
} }
return ret; return ret;
} }
/**
* i40e_get_head - Retrieve head from head writeback
* @tx_ring: tx ring to fetch head of
*
* Returns value of Tx ring head based on value stored
* in head write-back location
**/
static inline u32 i40e_get_head(struct i40e_ring *tx_ring)
{
void *head = (struct i40e_tx_desc *)tx_ring->desc + tx_ring->count;
return le32_to_cpu(*(volatile __le32 *)head);
}
#define WB_STRIDE 0x3 #define WB_STRIDE 0x3
/** /**
...@@ -2139,6 +2145,67 @@ static int i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size) ...@@ -2139,6 +2145,67 @@ static int i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size)
return __i40e_maybe_stop_tx(tx_ring, size); return __i40e_maybe_stop_tx(tx_ring, size);
} }
/**
* i40e_chk_linearize - Check if there are more than 8 fragments per packet
* @skb: send buffer
* @tx_flags: collected send information
* @hdr_len: size of the packet header
*
* Note: Our HW can't scatter-gather more than 8 fragments to build
* a packet on the wire and so we need to figure out the cases where we
* need to linearize the skb.
**/
static bool i40e_chk_linearize(struct sk_buff *skb, u32 tx_flags,
const u8 hdr_len)
{
struct skb_frag_struct *frag;
bool linearize = false;
unsigned int size = 0;
u16 num_frags;
u16 gso_segs;
num_frags = skb_shinfo(skb)->nr_frags;
gso_segs = skb_shinfo(skb)->gso_segs;
if (tx_flags & (I40E_TX_FLAGS_TSO | I40E_TX_FLAGS_FSO)) {
u16 j = 1;
if (num_frags < (I40E_MAX_BUFFER_TXD))
goto linearize_chk_done;
/* try the simple math, if we have too many frags per segment */
if (DIV_ROUND_UP((num_frags + gso_segs), gso_segs) >
I40E_MAX_BUFFER_TXD) {
linearize = true;
goto linearize_chk_done;
}
frag = &skb_shinfo(skb)->frags[0];
size = hdr_len;
/* we might still have more fragments per segment */
do {
size += skb_frag_size(frag);
frag++; j++;
if (j == I40E_MAX_BUFFER_TXD) {
if (size < skb_shinfo(skb)->gso_size) {
linearize = true;
break;
}
j = 1;
size -= skb_shinfo(skb)->gso_size;
if (size)
j++;
size += hdr_len;
}
num_frags--;
} while (num_frags);
} else {
if (num_frags >= I40E_MAX_BUFFER_TXD)
linearize = true;
}
linearize_chk_done:
return linearize;
}
/** /**
* i40e_tx_map - Build the Tx descriptor * i40e_tx_map - Build the Tx descriptor
* @tx_ring: ring to send buffer on * @tx_ring: ring to send buffer on
...@@ -2396,6 +2463,10 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, ...@@ -2396,6 +2463,10 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb,
if (tsyn) if (tsyn)
tx_flags |= I40E_TX_FLAGS_TSYN; tx_flags |= I40E_TX_FLAGS_TSYN;
if (i40e_chk_linearize(skb, tx_flags, hdr_len))
if (skb_linearize(skb))
goto out_drop;
skb_tx_timestamp(skb); skb_tx_timestamp(skb);
/* always enable CRC insertion offload */ /* always enable CRC insertion offload */
......
...@@ -112,6 +112,7 @@ enum i40e_dyn_idx_t { ...@@ -112,6 +112,7 @@ enum i40e_dyn_idx_t {
#define i40e_rx_desc i40e_32byte_rx_desc #define i40e_rx_desc i40e_32byte_rx_desc
#define I40E_MAX_BUFFER_TXD 8
#define I40E_MIN_TX_LEN 17 #define I40E_MIN_TX_LEN 17
#define I40E_MAX_DATA_PER_TXD 8192 #define I40E_MAX_DATA_PER_TXD 8192
......
...@@ -125,6 +125,20 @@ void i40evf_free_tx_resources(struct i40e_ring *tx_ring) ...@@ -125,6 +125,20 @@ void i40evf_free_tx_resources(struct i40e_ring *tx_ring)
} }
} }
/**
* i40e_get_head - Retrieve head from head writeback
* @tx_ring: tx ring to fetch head of
*
* Returns value of Tx ring head based on value stored
* in head write-back location
**/
static inline u32 i40e_get_head(struct i40e_ring *tx_ring)
{
void *head = (struct i40e_tx_desc *)tx_ring->desc + tx_ring->count;
return le32_to_cpu(*(volatile __le32 *)head);
}
/** /**
* i40e_get_tx_pending - how many tx descriptors not processed * i40e_get_tx_pending - how many tx descriptors not processed
* @tx_ring: the ring of descriptors * @tx_ring: the ring of descriptors
...@@ -134,10 +148,16 @@ void i40evf_free_tx_resources(struct i40e_ring *tx_ring) ...@@ -134,10 +148,16 @@ void i40evf_free_tx_resources(struct i40e_ring *tx_ring)
**/ **/
static u32 i40e_get_tx_pending(struct i40e_ring *ring) static u32 i40e_get_tx_pending(struct i40e_ring *ring)
{ {
u32 ntu = ((ring->next_to_clean <= ring->next_to_use) u32 head, tail;
? ring->next_to_use
: ring->next_to_use + ring->count); head = i40e_get_head(ring);
return ntu - ring->next_to_clean; tail = readl(ring->tail);
if (head != tail)
return (head < tail) ?
tail - head : (tail + ring->count - head);
return 0;
} }
/** /**
...@@ -146,6 +166,8 @@ static u32 i40e_get_tx_pending(struct i40e_ring *ring) ...@@ -146,6 +166,8 @@ static u32 i40e_get_tx_pending(struct i40e_ring *ring)
**/ **/
static bool i40e_check_tx_hang(struct i40e_ring *tx_ring) static bool i40e_check_tx_hang(struct i40e_ring *tx_ring)
{ {
u32 tx_done = tx_ring->stats.packets;
u32 tx_done_old = tx_ring->tx_stats.tx_done_old;
u32 tx_pending = i40e_get_tx_pending(tx_ring); u32 tx_pending = i40e_get_tx_pending(tx_ring);
bool ret = false; bool ret = false;
...@@ -162,36 +184,20 @@ static bool i40e_check_tx_hang(struct i40e_ring *tx_ring) ...@@ -162,36 +184,20 @@ static bool i40e_check_tx_hang(struct i40e_ring *tx_ring)
* run the check_tx_hang logic with a transmit completion * run the check_tx_hang logic with a transmit completion
* pending but without time to complete it yet. * pending but without time to complete it yet.
*/ */
if ((tx_ring->tx_stats.tx_done_old == tx_ring->stats.packets) && if ((tx_done_old == tx_done) && tx_pending) {
(tx_pending >= I40E_MIN_DESC_PENDING)) {
/* make sure it is true for two checks in a row */ /* make sure it is true for two checks in a row */
ret = test_and_set_bit(__I40E_HANG_CHECK_ARMED, ret = test_and_set_bit(__I40E_HANG_CHECK_ARMED,
&tx_ring->state); &tx_ring->state);
} else if (!(tx_ring->tx_stats.tx_done_old == tx_ring->stats.packets) || } else if (tx_done_old == tx_done &&
!(tx_pending < I40E_MIN_DESC_PENDING) || (tx_pending < I40E_MIN_DESC_PENDING) && (tx_pending > 0)) {
!(tx_pending > 0)) {
/* update completed stats and disarm the hang check */ /* update completed stats and disarm the hang check */
tx_ring->tx_stats.tx_done_old = tx_ring->stats.packets; tx_ring->tx_stats.tx_done_old = tx_done;
clear_bit(__I40E_HANG_CHECK_ARMED, &tx_ring->state); clear_bit(__I40E_HANG_CHECK_ARMED, &tx_ring->state);
} }
return ret; return ret;
} }
/**
* i40e_get_head - Retrieve head from head writeback
* @tx_ring: tx ring to fetch head of
*
* Returns value of Tx ring head based on value stored
* in head write-back location
**/
static inline u32 i40e_get_head(struct i40e_ring *tx_ring)
{
void *head = (struct i40e_tx_desc *)tx_ring->desc + tx_ring->count;
return le32_to_cpu(*(volatile __le32 *)head);
}
#define WB_STRIDE 0x3 #define WB_STRIDE 0x3
/** /**
...@@ -1206,17 +1212,16 @@ static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb, ...@@ -1206,17 +1212,16 @@ static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb,
if (err < 0) if (err < 0)
return err; return err;
if (protocol == htons(ETH_P_IP)) { iph = skb->encapsulation ? inner_ip_hdr(skb) : ip_hdr(skb);
iph = skb->encapsulation ? inner_ip_hdr(skb) : ip_hdr(skb); ipv6h = skb->encapsulation ? inner_ipv6_hdr(skb) : ipv6_hdr(skb);
if (iph->version == 4) {
tcph = skb->encapsulation ? inner_tcp_hdr(skb) : tcp_hdr(skb); tcph = skb->encapsulation ? inner_tcp_hdr(skb) : tcp_hdr(skb);
iph->tot_len = 0; iph->tot_len = 0;
iph->check = 0; iph->check = 0;
tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
0, IPPROTO_TCP, 0); 0, IPPROTO_TCP, 0);
} else if (skb_is_gso_v6(skb)) { } else if (ipv6h->version == 6) {
ipv6h = skb->encapsulation ? inner_ipv6_hdr(skb)
: ipv6_hdr(skb);
tcph = skb->encapsulation ? inner_tcp_hdr(skb) : tcp_hdr(skb); tcph = skb->encapsulation ? inner_tcp_hdr(skb) : tcp_hdr(skb);
ipv6h->payload_len = 0; ipv6h->payload_len = 0;
tcph->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, tcph->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
...@@ -1274,13 +1279,9 @@ static void i40e_tx_enable_csum(struct sk_buff *skb, u32 tx_flags, ...@@ -1274,13 +1279,9 @@ static void i40e_tx_enable_csum(struct sk_buff *skb, u32 tx_flags,
I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM; I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM;
} }
} else if (tx_flags & I40E_TX_FLAGS_IPV6) { } else if (tx_flags & I40E_TX_FLAGS_IPV6) {
if (tx_flags & I40E_TX_FLAGS_TSO) { *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV6;
*cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV6; if (tx_flags & I40E_TX_FLAGS_TSO)
ip_hdr(skb)->check = 0; ip_hdr(skb)->check = 0;
} else {
*cd_tunneling |=
I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM;
}
} }
/* Now set the ctx descriptor fields */ /* Now set the ctx descriptor fields */
...@@ -1290,6 +1291,11 @@ static void i40e_tx_enable_csum(struct sk_buff *skb, u32 tx_flags, ...@@ -1290,6 +1291,11 @@ static void i40e_tx_enable_csum(struct sk_buff *skb, u32 tx_flags,
((skb_inner_network_offset(skb) - ((skb_inner_network_offset(skb) -
skb_transport_offset(skb)) >> 1) << skb_transport_offset(skb)) >> 1) <<
I40E_TXD_CTX_QW0_NATLEN_SHIFT; I40E_TXD_CTX_QW0_NATLEN_SHIFT;
if (this_ip_hdr->version == 6) {
tx_flags &= ~I40E_TX_FLAGS_IPV4;
tx_flags |= I40E_TX_FLAGS_IPV6;
}
} else { } else {
network_hdr_len = skb_network_header_len(skb); network_hdr_len = skb_network_header_len(skb);
...@@ -1380,6 +1386,67 @@ static void i40e_create_tx_ctx(struct i40e_ring *tx_ring, ...@@ -1380,6 +1386,67 @@ static void i40e_create_tx_ctx(struct i40e_ring *tx_ring,
context_desc->type_cmd_tso_mss = cpu_to_le64(cd_type_cmd_tso_mss); context_desc->type_cmd_tso_mss = cpu_to_le64(cd_type_cmd_tso_mss);
} }
/**
* i40e_chk_linearize - Check if there are more than 8 fragments per packet
* @skb: send buffer
* @tx_flags: collected send information
* @hdr_len: size of the packet header
*
* Note: Our HW can't scatter-gather more than 8 fragments to build
* a packet on the wire and so we need to figure out the cases where we
* need to linearize the skb.
**/
static bool i40e_chk_linearize(struct sk_buff *skb, u32 tx_flags,
const u8 hdr_len)
{
struct skb_frag_struct *frag;
bool linearize = false;
unsigned int size = 0;
u16 num_frags;
u16 gso_segs;
num_frags = skb_shinfo(skb)->nr_frags;
gso_segs = skb_shinfo(skb)->gso_segs;
if (tx_flags & (I40E_TX_FLAGS_TSO | I40E_TX_FLAGS_FSO)) {
u16 j = 1;
if (num_frags < (I40E_MAX_BUFFER_TXD))
goto linearize_chk_done;
/* try the simple math, if we have too many frags per segment */
if (DIV_ROUND_UP((num_frags + gso_segs), gso_segs) >
I40E_MAX_BUFFER_TXD) {
linearize = true;
goto linearize_chk_done;
}
frag = &skb_shinfo(skb)->frags[0];
size = hdr_len;
/* we might still have more fragments per segment */
do {
size += skb_frag_size(frag);
frag++; j++;
if (j == I40E_MAX_BUFFER_TXD) {
if (size < skb_shinfo(skb)->gso_size) {
linearize = true;
break;
}
j = 1;
size -= skb_shinfo(skb)->gso_size;
if (size)
j++;
size += hdr_len;
}
num_frags--;
} while (num_frags);
} else {
if (num_frags >= I40E_MAX_BUFFER_TXD)
linearize = true;
}
linearize_chk_done:
return linearize;
}
/** /**
* i40e_tx_map - Build the Tx descriptor * i40e_tx_map - Build the Tx descriptor
* @tx_ring: ring to send buffer on * @tx_ring: ring to send buffer on
...@@ -1654,6 +1721,10 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, ...@@ -1654,6 +1721,10 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb,
else if (tso) else if (tso)
tx_flags |= I40E_TX_FLAGS_TSO; tx_flags |= I40E_TX_FLAGS_TSO;
if (i40e_chk_linearize(skb, tx_flags, hdr_len))
if (skb_linearize(skb))
goto out_drop;
skb_tx_timestamp(skb); skb_tx_timestamp(skb);
/* always enable CRC insertion offload */ /* always enable CRC insertion offload */
......
...@@ -112,6 +112,7 @@ enum i40e_dyn_idx_t { ...@@ -112,6 +112,7 @@ enum i40e_dyn_idx_t {
#define i40e_rx_desc i40e_32byte_rx_desc #define i40e_rx_desc i40e_32byte_rx_desc
#define I40E_MAX_BUFFER_TXD 8
#define I40E_MIN_TX_LEN 17 #define I40E_MIN_TX_LEN 17
#define I40E_MAX_DATA_PER_TXD 8192 #define I40E_MAX_DATA_PER_TXD 8192
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册