提交 117ce394 编写于 作者: D David S. Miller

Merge branch '40GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/next-queue

Jeff Kirsher says:

====================
Intel Wired LAN Driver Updates 2015-12-01

This series contains updates to i40e and i40evf only.

Helin adds new fields to i40e_vsi to store user configured RSS config data
and the code to use it.  Also renamed RSS items to clarify functionality
and scope to users.  Fixed a confusing kernel message of enabling RSS size
by reporting it together with the hardware maximum RSS size.

Anjali fixes the issue of forcing writeback too often causing us to not
benefit from NAPI.

Jesse adds a prefetch for data early in the transmit path to help immensely
for pktgen and forwarding workloads.  Fixed the i40e driver that was
possibly sleeping inside critical section of code.

Carolyn fixes an issue where adminq init failures always provided a message
that NVM was newer than expected, when this is not always the case for
init_adminq failures.  Fixed by adding a check for that specific error
condition and a different helpful message otherwise.

Mitch fixes error message by telling the user which VF is being naughty,
rather than making them guess.  Updated the queue_vector array from a
statically-sized member of the adapter structure, to a dynamically-allocated
and -sized array.  This reduces the size of the adapter structure and allows
us to support any number of queue vectors in the future without changing the
code.
====================
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
......@@ -266,7 +266,7 @@ struct i40e_pf {
u16 num_lan_qps; /* num lan queues this PF has set up */
u16 num_lan_msix; /* num queue vectors for the base PF vsi */
int queues_left; /* queues left unclaimed */
u16 rss_size; /* num queues in the RSS array */
u16 alloc_rss_size; /* allocated RSS queues */
u16 rss_size_max; /* HW defined max RSS queues */
u16 fdir_pf_filter_count; /* num of guaranteed filters for this PF */
u16 num_alloc_vsi; /* num VSIs this driver supports */
......@@ -413,7 +413,7 @@ struct i40e_pf {
u32 rx_hwtstamp_cleared;
bool ptp_tx;
bool ptp_rx;
u16 rss_table_size;
u16 rss_table_size; /* HW RSS table size */
/* These are only valid in NPAR modes */
u32 npar_max_bw;
u32 npar_min_bw;
......@@ -506,8 +506,10 @@ struct i40e_vsi {
u16 tx_itr_setting;
u16 int_rate_limit; /* value in usecs */
u16 rss_table_size;
u16 rss_size;
u16 rss_table_size; /* HW RSS table size */
u16 rss_size; /* Allocated RSS queues */
u8 *rss_hkey_user; /* User configured hash keys */
u8 *rss_lut_user; /* User configured lookup table entries */
u16 max_frame;
u16 rx_hdr_len;
......@@ -695,7 +697,7 @@ struct i40e_mac_filter *i40e_add_filter(struct i40e_vsi *vsi,
bool is_vf, bool is_netdev);
void i40e_del_filter(struct i40e_vsi *vsi, u8 *macaddr, s16 vlan,
bool is_vf, bool is_netdev);
int i40e_sync_vsi_filters(struct i40e_vsi *vsi, bool grab_rtnl);
int i40e_sync_vsi_filters(struct i40e_vsi *vsi);
struct i40e_vsi *i40e_vsi_setup(struct i40e_pf *pf, u8 type,
u16 uplink, u32 param1);
int i40e_vsi_release(struct i40e_vsi *vsi);
......
......@@ -1138,7 +1138,7 @@ static ssize_t i40e_dbg_command_write(struct file *filp,
spin_lock_bh(&vsi->mac_filter_list_lock);
f = i40e_add_filter(vsi, ma, vlan, false, false);
spin_unlock_bh(&vsi->mac_filter_list_lock);
ret = i40e_sync_vsi_filters(vsi, true);
ret = i40e_sync_vsi_filters(vsi);
if (f && !ret)
dev_info(&pf->pdev->dev,
"add macaddr: %pM vlan=%d added to VSI %d\n",
......@@ -1177,7 +1177,7 @@ static ssize_t i40e_dbg_command_write(struct file *filp,
spin_lock_bh(&vsi->mac_filter_list_lock);
i40e_del_filter(vsi, ma, vlan, false, false);
spin_unlock_bh(&vsi->mac_filter_list_lock);
ret = i40e_sync_vsi_filters(vsi, true);
ret = i40e_sync_vsi_filters(vsi);
if (!ret)
dev_info(&pf->pdev->dev,
"del macaddr: %pM vlan=%d removed from VSI %d\n",
......
......@@ -2651,10 +2651,8 @@ static int i40e_set_rxfh(struct net_device *netdev, const u32 *indir,
{
struct i40e_netdev_priv *np = netdev_priv(netdev);
struct i40e_vsi *vsi = np->vsi;
u8 seed_def[I40E_HKEY_ARRAY_SIZE];
u8 *lut, *seed = NULL;
u8 *seed = NULL;
u16 i;
int ret;
if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP)
return -EOPNOTSUPP;
......@@ -2663,18 +2661,27 @@ static int i40e_set_rxfh(struct net_device *netdev, const u32 *indir,
return 0;
if (key) {
memcpy(seed_def, key, I40E_HKEY_ARRAY_SIZE);
seed = seed_def;
if (!vsi->rss_hkey_user) {
vsi->rss_hkey_user = kzalloc(I40E_HKEY_ARRAY_SIZE,
GFP_KERNEL);
if (!vsi->rss_hkey_user)
return -ENOMEM;
}
memcpy(vsi->rss_hkey_user, key, I40E_HKEY_ARRAY_SIZE);
seed = vsi->rss_hkey_user;
}
lut = kzalloc(I40E_HLUT_ARRAY_SIZE, GFP_KERNEL);
if (!lut)
return -ENOMEM;
if (!vsi->rss_lut_user) {
vsi->rss_lut_user = kzalloc(I40E_HLUT_ARRAY_SIZE, GFP_KERNEL);
if (!vsi->rss_lut_user)
return -ENOMEM;
}
/* Each 32 bits pointed by 'indir' is stored with a lut entry */
for (i = 0; i < I40E_HLUT_ARRAY_SIZE; i++)
lut[i] = (u8)(indir[i]);
ret = i40e_config_rss(vsi, seed, lut, I40E_HLUT_ARRAY_SIZE);
kfree(lut);
vsi->rss_lut_user[i] = (u8)(indir[i]);
return ret;
return i40e_config_rss(vsi, seed, vsi->rss_lut_user,
I40E_HLUT_ARRAY_SIZE);
}
/**
......
......@@ -39,7 +39,7 @@ static const char i40e_driver_string[] =
#define DRV_VERSION_MAJOR 1
#define DRV_VERSION_MINOR 4
#define DRV_VERSION_BUILD 2
#define DRV_VERSION_BUILD 4
#define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \
__stringify(DRV_VERSION_MINOR) "." \
__stringify(DRV_VERSION_BUILD) DRV_KERN
......@@ -1552,9 +1552,11 @@ static int i40e_set_mac(struct net_device *netdev, void *p)
spin_unlock_bh(&vsi->mac_filter_list_lock);
}
i40e_sync_vsi_filters(vsi, false);
ether_addr_copy(netdev->dev_addr, addr->sa_data);
/* schedule our worker thread which will take care of
* applying the new filter changes
*/
i40e_service_event_schedule(vsi->back);
return 0;
}
......@@ -1630,7 +1632,8 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi,
switch (vsi->type) {
case I40E_VSI_MAIN:
qcount = min_t(int, pf->rss_size, num_tc_qps);
qcount = min_t(int, pf->alloc_rss_size,
num_tc_qps);
break;
#ifdef I40E_FCOE
case I40E_VSI_FCOE:
......@@ -1856,13 +1859,12 @@ static void i40e_cleanup_add_list(struct list_head *add_list)
/**
* i40e_sync_vsi_filters - Update the VSI filter list to the HW
* @vsi: ptr to the VSI
* @grab_rtnl: whether RTNL needs to be grabbed
*
* Push any outstanding VSI filter changes through the AdminQ.
*
* Returns 0 or error value
**/
int i40e_sync_vsi_filters(struct i40e_vsi *vsi, bool grab_rtnl)
int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
{
struct list_head tmp_del_list, tmp_add_list;
struct i40e_mac_filter *f, *ftmp, *fclone;
......@@ -2117,12 +2119,7 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi, bool grab_rtnl)
*/
if (pf->cur_promisc != cur_promisc) {
pf->cur_promisc = cur_promisc;
if (grab_rtnl)
i40e_do_reset_safe(pf,
BIT(__I40E_PF_RESET_REQUESTED));
else
i40e_do_reset(pf,
BIT(__I40E_PF_RESET_REQUESTED));
set_bit(__I40E_PF_RESET_REQUESTED, &pf->state);
}
} else {
ret = i40e_aq_set_vsi_unicast_promiscuous(
......@@ -2171,8 +2168,15 @@ static void i40e_sync_filters_subtask(struct i40e_pf *pf)
for (v = 0; v < pf->num_alloc_vsi; v++) {
if (pf->vsi[v] &&
(pf->vsi[v]->flags & I40E_VSI_FLAG_FILTER_CHANGED))
i40e_sync_vsi_filters(pf->vsi[v], true);
(pf->vsi[v]->flags & I40E_VSI_FLAG_FILTER_CHANGED)) {
int ret = i40e_sync_vsi_filters(pf->vsi[v]);
if (ret) {
/* come back and try again later */
pf->flags |= I40E_FLAG_FILTER_SYNC;
break;
}
}
}
}
......@@ -2382,16 +2386,13 @@ int i40e_vsi_add_vlan(struct i40e_vsi *vsi, s16 vid)
}
}
/* Make sure to release before sync_vsi_filter because that
* function will lock/unlock as necessary
*/
spin_unlock_bh(&vsi->mac_filter_list_lock);
if (test_bit(__I40E_DOWN, &vsi->back->state) ||
test_bit(__I40E_RESET_RECOVERY_PENDING, &vsi->back->state))
return 0;
return i40e_sync_vsi_filters(vsi, false);
/* schedule our worker thread which will take care of
* applying the new filter changes
*/
i40e_service_event_schedule(vsi->back);
return 0;
}
/**
......@@ -2464,16 +2465,13 @@ int i40e_vsi_kill_vlan(struct i40e_vsi *vsi, s16 vid)
}
}
/* Make sure to release before sync_vsi_filter because that
* function with lock/unlock as necessary
*/
spin_unlock_bh(&vsi->mac_filter_list_lock);
if (test_bit(__I40E_DOWN, &vsi->back->state) ||
test_bit(__I40E_RESET_RECOVERY_PENDING, &vsi->back->state))
return 0;
return i40e_sync_vsi_filters(vsi, false);
/* schedule our worker thread which will take care of
* applying the new filter changes
*/
i40e_service_event_schedule(vsi->back);
return 0;
}
/**
......@@ -2716,6 +2714,11 @@ static void i40e_config_xps_tx_ring(struct i40e_ring *ring)
netif_set_xps_queue(ring->netdev, mask, ring->queue_index);
free_cpumask_var(mask);
}
/* schedule our worker thread which will take care of
* applying the new filter changes
*/
i40e_service_event_schedule(vsi->back);
}
/**
......@@ -7300,6 +7303,23 @@ static void i40e_vsi_free_arrays(struct i40e_vsi *vsi, bool free_qvectors)
vsi->rx_rings = NULL;
}
/**
* i40e_clear_rss_config_user - clear the user configured RSS hash keys
* and lookup table
* @vsi: Pointer to VSI structure
*/
static void i40e_clear_rss_config_user(struct i40e_vsi *vsi)
{
if (!vsi)
return;
kfree(vsi->rss_hkey_user);
vsi->rss_hkey_user = NULL;
kfree(vsi->rss_lut_user);
vsi->rss_lut_user = NULL;
}
/**
* i40e_vsi_clear - Deallocate the VSI provided
* @vsi: the VSI being un-configured
......@@ -7337,6 +7357,7 @@ static int i40e_vsi_clear(struct i40e_vsi *vsi)
i40e_put_lump(pf->irq_pile, vsi->base_vector, vsi->idx);
i40e_vsi_free_arrays(vsi, true);
i40e_clear_rss_config_user(vsi);
pf->vsi[vsi->idx] = NULL;
if (vsi->idx < pf->next_vsi)
......@@ -7865,7 +7886,7 @@ static int i40e_vsi_config_rss(struct i40e_vsi *vsi)
i40e_fill_rss_lut(pf, lut, vsi->rss_table_size, vsi->rss_size);
netdev_rss_key_fill((void *)seed, I40E_HKEY_ARRAY_SIZE);
vsi->rss_size = min_t(int, pf->rss_size, vsi->num_queue_pairs);
vsi->rss_size = min_t(int, pf->alloc_rss_size, vsi->num_queue_pairs);
ret = i40e_config_rss_aq(vsi, seed, lut, vsi->rss_table_size);
kfree(lut);
......@@ -8015,8 +8036,6 @@ static int i40e_pf_config_rss(struct i40e_pf *pf)
wr32(hw, I40E_PFQF_HENA(0), (u32)hena);
wr32(hw, I40E_PFQF_HENA(1), (u32)(hena >> 32));
vsi->rss_size = min_t(int, pf->rss_size, vsi->num_queue_pairs);
/* Determine the RSS table size based on the hardware capabilities */
reg_val = rd32(hw, I40E_PFQF_CTL_0);
reg_val = (pf->rss_table_size == 512) ?
......@@ -8024,15 +8043,29 @@ static int i40e_pf_config_rss(struct i40e_pf *pf)
(reg_val & ~I40E_PFQF_CTL_0_HASHLUTSIZE_512);
wr32(hw, I40E_PFQF_CTL_0, reg_val);
/* Determine the RSS size of the VSI */
if (!vsi->rss_size)
vsi->rss_size = min_t(int, pf->alloc_rss_size,
vsi->num_queue_pairs);
lut = kzalloc(vsi->rss_table_size, GFP_KERNEL);
if (!lut)
return -ENOMEM;
i40e_fill_rss_lut(pf, lut, vsi->rss_table_size, vsi->rss_size);
/* Use user configured lut if there is one, otherwise use default */
if (vsi->rss_lut_user)
memcpy(lut, vsi->rss_lut_user, vsi->rss_table_size);
else
i40e_fill_rss_lut(pf, lut, vsi->rss_table_size, vsi->rss_size);
netdev_rss_key_fill((void *)seed, I40E_HKEY_ARRAY_SIZE);
/* Use user configured hash key if there is one, otherwise
* use default.
*/
if (vsi->rss_hkey_user)
memcpy(seed, vsi->rss_hkey_user, I40E_HKEY_ARRAY_SIZE);
else
netdev_rss_key_fill((void *)seed, I40E_HKEY_ARRAY_SIZE);
ret = i40e_config_rss(vsi, seed, lut, vsi->rss_table_size);
kfree(lut);
return ret;
......@@ -8060,13 +8093,28 @@ int i40e_reconfig_rss_queues(struct i40e_pf *pf, int queue_count)
vsi->req_queue_pairs = queue_count;
i40e_prep_for_reset(pf);
pf->rss_size = new_rss_size;
pf->alloc_rss_size = new_rss_size;
i40e_reset_and_rebuild(pf, true);
/* Discard the user configured hash keys and lut, if less
* queues are enabled.
*/
if (queue_count < vsi->rss_size) {
i40e_clear_rss_config_user(vsi);
dev_dbg(&pf->pdev->dev,
"discard user configured hash keys and lut\n");
}
/* Reset vsi->rss_size, as number of enabled queues changed */
vsi->rss_size = min_t(int, pf->alloc_rss_size,
vsi->num_queue_pairs);
i40e_pf_config_rss(pf);
}
dev_info(&pf->pdev->dev, "RSS count: %d\n", pf->rss_size);
return pf->rss_size;
dev_info(&pf->pdev->dev, "RSS count/HW max RSS count: %d/%d\n",
pf->alloc_rss_size, pf->rss_size_max);
return pf->alloc_rss_size;
}
/**
......@@ -8237,13 +8285,14 @@ static int i40e_sw_init(struct i40e_pf *pf)
* maximum might end up larger than the available queues
*/
pf->rss_size_max = BIT(pf->hw.func_caps.rss_table_entry_width);
pf->rss_size = 1;
pf->alloc_rss_size = 1;
pf->rss_table_size = pf->hw.func_caps.rss_table_size;
pf->rss_size_max = min_t(int, pf->rss_size_max,
pf->hw.func_caps.num_tx_qp);
if (pf->hw.func_caps.rss) {
pf->flags |= I40E_FLAG_RSS_ENABLED;
pf->rss_size = min_t(int, pf->rss_size_max, num_online_cpus());
pf->alloc_rss_size = min_t(int, pf->rss_size_max,
num_online_cpus());
}
/* MFP mode enabled */
......@@ -9176,7 +9225,7 @@ int i40e_vsi_release(struct i40e_vsi *vsi)
f->is_vf, f->is_netdev);
spin_unlock_bh(&vsi->mac_filter_list_lock);
i40e_sync_vsi_filters(vsi, false);
i40e_sync_vsi_filters(vsi);
i40e_vsi_delete(vsi);
i40e_vsi_free_q_vectors(vsi);
......@@ -10110,7 +10159,7 @@ static void i40e_determine_queue_usage(struct i40e_pf *pf)
!(pf->flags & I40E_FLAG_MSIX_ENABLED)) {
/* one qp for PF, no queues for anything else */
queues_left = 0;
pf->rss_size = pf->num_lan_qps = 1;
pf->alloc_rss_size = pf->num_lan_qps = 1;
/* make sure all the fancies are disabled */
pf->flags &= ~(I40E_FLAG_RSS_ENABLED |
......@@ -10127,7 +10176,7 @@ static void i40e_determine_queue_usage(struct i40e_pf *pf)
I40E_FLAG_FD_ATR_ENABLED |
I40E_FLAG_DCB_CAPABLE))) {
/* one qp for PF */
pf->rss_size = pf->num_lan_qps = 1;
pf->alloc_rss_size = pf->num_lan_qps = 1;
queues_left -= pf->num_lan_qps;
pf->flags &= ~(I40E_FLAG_RSS_ENABLED |
......@@ -10197,8 +10246,9 @@ static void i40e_determine_queue_usage(struct i40e_pf *pf)
"qs_avail=%d FD SB=%d lan_qs=%d lan_tc0=%d vf=%d*%d vmdq=%d*%d, remaining=%d\n",
pf->hw.func_caps.num_tx_qp,
!!(pf->flags & I40E_FLAG_FD_SB_ENABLED),
pf->num_lan_qps, pf->rss_size, pf->num_req_vfs, pf->num_vf_qps,
pf->num_vmdq_vsis, pf->num_vmdq_qps, queues_left);
pf->num_lan_qps, pf->alloc_rss_size, pf->num_req_vfs,
pf->num_vf_qps, pf->num_vmdq_vsis, pf->num_vmdq_qps,
queues_left);
#ifdef I40E_FCOE
dev_dbg(&pf->pdev->dev, "fcoe queues = %d\n", pf->num_fcoe_qps);
#endif
......@@ -10424,6 +10474,16 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
pf->hw.fc.requested_mode = I40E_FC_NONE;
err = i40e_init_adminq(hw);
if (err) {
if (err == I40E_ERR_FIRMWARE_API_VERSION)
dev_info(&pdev->dev,
"The driver for the device stopped because the NVM image is newer than expected. You must install the most recent version of the network driver.\n");
else
dev_info(&pdev->dev,
"The driver for the device stopped because the device firmware failed to init. Try updating your NVM image.\n");
goto err_pf_reset;
}
/* provide nvm, fw, api versions */
dev_info(&pdev->dev, "fw %d.%d.%05d api %d.%d nvm %s\n",
......@@ -10431,12 +10491,6 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
hw->aq.api_maj_ver, hw->aq.api_min_ver,
i40e_nvm_version_str(hw));
if (err) {
dev_info(&pdev->dev,
"The driver for the device stopped because the NVM image is newer than expected. You must install the most recent version of the network driver.\n");
goto err_pf_reset;
}
if (hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR &&
hw->aq.api_min_ver > I40E_FW_API_VERSION_MINOR)
dev_info(&pdev->dev,
......
......@@ -2806,6 +2806,9 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb,
int tsyn;
int tso;
/* prefetch the data, we'll need it later */
prefetch(skb->data);
if (0 == i40e_xmit_descriptor_count(skb, tx_ring))
return NETDEV_TX_BUSY;
......
......@@ -565,7 +565,7 @@ static int i40e_alloc_vsi_res(struct i40e_vf *vf, enum i40e_vsi_type type)
}
/* program mac filter */
ret = i40e_sync_vsi_filters(vsi, false);
ret = i40e_sync_vsi_filters(vsi);
if (ret)
dev_err(&pf->pdev->dev, "Unable to program ucast filters\n");
......@@ -1094,8 +1094,8 @@ static int i40e_vc_send_msg_to_vf(struct i40e_vf *vf, u32 v_opcode,
/* single place to detect unsuccessful return values */
if (v_retval) {
vf->num_invalid_msgs++;
dev_err(&pf->pdev->dev, "Failed opcode %d Error: %d\n",
v_opcode, v_retval);
dev_err(&pf->pdev->dev, "VF %d failed opcode %d, error: %d\n",
vf->vf_id, v_opcode, v_retval);
if (vf->num_invalid_msgs >
I40E_DEFAULT_NUM_INVALID_MSGS_ALLOWED) {
dev_err(&pf->pdev->dev,
......@@ -1633,7 +1633,7 @@ static int i40e_vc_add_mac_addr_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
spin_unlock_bh(&vsi->mac_filter_list_lock);
/* program the updated filter list */
if (i40e_sync_vsi_filters(vsi, false))
if (i40e_sync_vsi_filters(vsi))
dev_err(&pf->pdev->dev, "Unable to program VF %d MAC filters\n",
vf->vf_id);
......@@ -1687,7 +1687,7 @@ static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
spin_unlock_bh(&vsi->mac_filter_list_lock);
/* program the updated filter list */
if (i40e_sync_vsi_filters(vsi, false))
if (i40e_sync_vsi_filters(vsi))
dev_err(&pf->pdev->dev, "Unable to program VF %d MAC filters\n",
vf->vf_id);
......@@ -2102,7 +2102,7 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
dev_info(&pf->pdev->dev, "Setting MAC %pM on VF %d\n", mac, vf_id);
/* program mac filter */
if (i40e_sync_vsi_filters(vsi, false)) {
if (i40e_sync_vsi_filters(vsi)) {
dev_err(&pf->pdev->dev, "Unable to program ucast filters\n");
ret = -EIO;
goto error_param;
......
......@@ -245,16 +245,6 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
tx_ring->q_vector->tx.total_bytes += total_bytes;
tx_ring->q_vector->tx.total_packets += total_packets;
/* check to see if there are any non-cache aligned descriptors
* waiting to be written back, and kick the hardware to force
* them to be written back in case of napi polling
*/
if (budget &&
!((i & WB_STRIDE) == WB_STRIDE) &&
!test_bit(__I40E_DOWN, &tx_ring->vsi->state) &&
(I40E_DESC_UNUSED(tx_ring) != tx_ring->count))
tx_ring->arm_wb = true;
netdev_tx_completed_queue(netdev_get_tx_queue(tx_ring->netdev,
tx_ring->queue_index),
total_packets, total_bytes);
......@@ -1770,6 +1760,9 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
u32 td_tag = 0;
dma_addr_t dma;
u16 gso_segs;
u16 desc_count = 0;
bool tail_bump = true;
bool do_rs = false;
if (tx_flags & I40E_TX_FLAGS_HW_VLAN) {
td_cmd |= I40E_TX_DESC_CMD_IL2TAG1;
......@@ -1810,6 +1803,8 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
tx_desc++;
i++;
desc_count++;
if (i == tx_ring->count) {
tx_desc = I40E_TX_DESC(tx_ring, 0);
i = 0;
......@@ -1829,6 +1824,8 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
tx_desc++;
i++;
desc_count++;
if (i == tx_ring->count) {
tx_desc = I40E_TX_DESC(tx_ring, 0);
i = 0;
......@@ -1843,35 +1840,7 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
tx_bi = &tx_ring->tx_bi[i];
}
/* Place RS bit on last descriptor of any packet that spans across the
* 4th descriptor (WB_STRIDE aka 0x3) in a 64B cacheline.
*/
#define WB_STRIDE 0x3
if (((i & WB_STRIDE) != WB_STRIDE) &&
(first <= &tx_ring->tx_bi[i]) &&
(first >= &tx_ring->tx_bi[i & ~WB_STRIDE])) {
tx_desc->cmd_type_offset_bsz =
build_ctob(td_cmd, td_offset, size, td_tag) |
cpu_to_le64((u64)I40E_TX_DESC_CMD_EOP <<
I40E_TXD_QW1_CMD_SHIFT);
} else {
tx_desc->cmd_type_offset_bsz =
build_ctob(td_cmd, td_offset, size, td_tag) |
cpu_to_le64((u64)I40E_TXD_CMD <<
I40E_TXD_QW1_CMD_SHIFT);
}
netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev,
tx_ring->queue_index),
first->bytecount);
/* Force memory writes to complete before letting h/w
* know there are new descriptors to fetch. (Only
* applicable for weak-ordered memory model archs,
* such as IA-64).
*/
wmb();
/* set next_to_watch value indicating a packet is present */
first->next_to_watch = tx_desc;
......@@ -1881,15 +1850,78 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
tx_ring->next_to_use = i;
netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev,
tx_ring->queue_index),
first->bytecount);
i40evf_maybe_stop_tx(tx_ring, DESC_NEEDED);
/* Algorithm to optimize tail and RS bit setting:
* if xmit_more is supported
* if xmit_more is true
* do not update tail and do not mark RS bit.
* if xmit_more is false and last xmit_more was false
* if every packet spanned less than 4 desc
* then set RS bit on 4th packet and update tail
* on every packet
* else
* update tail and set RS bit on every packet.
* if xmit_more is false and last_xmit_more was true
* update tail and set RS bit.
* else (kernel < 3.18)
* if every packet spanned less than 4 desc
* then set RS bit on 4th packet and update tail
* on every packet
* else
* set RS bit on EOP for every packet and update tail
*
* Optimization: wmb to be issued only in case of tail update.
* Also optimize the Descriptor WB path for RS bit with the same
* algorithm.
*
* Note: If there are less than 4 packets
* pending and interrupts were disabled the service task will
* trigger a force WB.
*/
if (skb->xmit_more &&
!netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev,
tx_ring->queue_index))) {
tx_ring->flags |= I40E_TXR_FLAGS_LAST_XMIT_MORE_SET;
tail_bump = false;
} else if (!skb->xmit_more &&
!netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev,
tx_ring->queue_index)) &&
(!(tx_ring->flags & I40E_TXR_FLAGS_LAST_XMIT_MORE_SET)) &&
(tx_ring->packet_stride < WB_STRIDE) &&
(desc_count < WB_STRIDE)) {
tx_ring->packet_stride++;
} else {
tx_ring->packet_stride = 0;
tx_ring->flags &= ~I40E_TXR_FLAGS_LAST_XMIT_MORE_SET;
do_rs = true;
}
if (do_rs)
tx_ring->packet_stride = 0;
tx_desc->cmd_type_offset_bsz =
build_ctob(td_cmd, td_offset, size, td_tag) |
cpu_to_le64((u64)(do_rs ? I40E_TXD_CMD :
I40E_TX_DESC_CMD_EOP) <<
I40E_TXD_QW1_CMD_SHIFT);
/* notify HW of packet */
if (!skb->xmit_more ||
netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev,
tx_ring->queue_index)))
writel(i, tx_ring->tail);
else
if (!tail_bump)
prefetchw(tx_desc + 1);
if (tail_bump) {
/* Force memory writes to complete before letting h/w
* know there are new descriptors to fetch. (Only
* applicable for weak-ordered memory model archs,
* such as IA-64).
*/
wmb();
writel(i, tx_ring->tail);
}
return;
dma_error:
......@@ -1961,6 +1993,9 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb,
u8 hdr_len = 0;
int tso;
/* prefetch the data, we'll need it later */
prefetch(skb->data);
if (0 == i40evf_xmit_descriptor_count(skb, tx_ring))
return NETDEV_TX_BUSY;
......@@ -2028,7 +2063,7 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb,
netdev_tx_t i40evf_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
{
struct i40evf_adapter *adapter = netdev_priv(netdev);
struct i40e_ring *tx_ring = adapter->tx_rings[skb->queue_mapping];
struct i40e_ring *tx_ring = &adapter->tx_rings[skb->queue_mapping];
/* hardware can't handle really short frames, hardware padding works
* beyond this point
......
......@@ -268,6 +268,8 @@ struct i40e_ring {
bool ring_active; /* is ring online or not */
bool arm_wb; /* do something to arm write back */
u8 packet_stride;
#define I40E_TXR_FLAGS_LAST_XMIT_MORE_SET BIT(2)
u16 flags;
#define I40E_TXR_FLAGS_WB_ON_ITR BIT(0)
......
......@@ -67,6 +67,8 @@ struct i40e_vsi {
u16 rx_itr_setting;
u16 tx_itr_setting;
u16 qs_handle;
u8 *rss_hkey_user; /* User configured hash keys */
u8 *rss_lut_user; /* User configured lookup table entries */
};
/* How many Rx Buffers do we bundle into one write to the hardware ? */
......@@ -99,6 +101,7 @@ struct i40e_vsi {
#define MAX_TX_QUEUES MAX_RX_QUEUES
#define I40EVF_HKEY_ARRAY_SIZE ((I40E_VFQF_HKEY_MAX_INDEX + 1) * 4)
#define I40EVF_HLUT_ARRAY_SIZE ((I40E_VFQF_HLUT_MAX_INDEX + 1) * 4)
/* MAX_MSIX_Q_VECTORS of these are allocated,
* but we only use one per queue-specific vector.
......@@ -142,9 +145,6 @@ struct i40e_q_vector {
#define OTHER_VECTOR 1
#define NONQ_VECS (OTHER_VECTOR)
#define MAX_MSIX_Q_VECTORS 4
#define MAX_MSIX_COUNT 5
#define MIN_MSIX_Q_VECTORS 1
#define MIN_MSIX_COUNT (MIN_MSIX_Q_VECTORS + NONQ_VECS)
......@@ -190,19 +190,19 @@ struct i40evf_adapter {
struct work_struct reset_task;
struct work_struct adminq_task;
struct delayed_work init_task;
struct i40e_q_vector *q_vector[MAX_MSIX_Q_VECTORS];
struct i40e_q_vector *q_vectors;
struct list_head vlan_filter_list;
char misc_vector_name[IFNAMSIZ + 9];
int num_active_queues;
/* TX */
struct i40e_ring *tx_rings[I40E_MAX_VSI_QP];
struct i40e_ring *tx_rings;
u32 tx_timeout_count;
struct list_head mac_filter_list;
u32 tx_desc_count;
/* RX */
struct i40e_ring *rx_rings[I40E_MAX_VSI_QP];
struct i40e_ring *rx_rings;
u64 hw_csum_rx_error;
u32 rx_desc_count;
int num_msix_vectors;
......@@ -313,4 +313,8 @@ void i40evf_request_reset(struct i40evf_adapter *adapter);
void i40evf_virtchnl_completion(struct i40evf_adapter *adapter,
enum i40e_virtchnl_ops v_opcode,
i40e_status v_retval, u8 *msg, u16 msglen);
int i40evf_config_rss(struct i40e_vsi *vsi, const u8 *seed, u8 *lut,
u16 lut_size);
int i40evf_get_rss(struct i40e_vsi *vsi, const u8 *seed, u8 *lut,
u16 lut_size);
#endif /* _I40EVF_H_ */
......@@ -121,12 +121,12 @@ static void i40evf_get_ethtool_stats(struct net_device *netdev,
data[i] = *(u64 *)p;
}
for (j = 0; j < adapter->num_active_queues; j++) {
data[i++] = adapter->tx_rings[j]->stats.packets;
data[i++] = adapter->tx_rings[j]->stats.bytes;
data[i++] = adapter->tx_rings[j].stats.packets;
data[i++] = adapter->tx_rings[j].stats.bytes;
}
for (j = 0; j < adapter->num_active_queues; j++) {
data[i++] = adapter->rx_rings[j]->stats.packets;
data[i++] = adapter->rx_rings[j]->stats.bytes;
data[i++] = adapter->rx_rings[j].stats.packets;
data[i++] = adapter->rx_rings[j].stats.bytes;
}
}
......@@ -351,7 +351,7 @@ static int i40evf_set_coalesce(struct net_device *netdev,
vsi->tx_itr_setting &= ~I40E_ITR_DYNAMIC;
for (i = 0; i < adapter->num_msix_vectors - NONQ_VECS; i++) {
q_vector = adapter->q_vector[i];
q_vector = &adapter->q_vectors[i];
q_vector->rx.itr = ITR_TO_REG(vsi->rx_itr_setting);
wr32(hw, I40E_VFINT_ITRN1(0, i), q_vector->rx.itr);
q_vector->tx.itr = ITR_TO_REG(vsi->tx_itr_setting);
......@@ -634,25 +634,34 @@ static int i40evf_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
u8 *hfunc)
{
struct i40evf_adapter *adapter = netdev_priv(netdev);
struct i40e_hw *hw = &adapter->hw;
u32 hlut_val;
int i, j;
struct i40e_vsi *vsi = &adapter->vsi;
u8 *seed = NULL, *lut;
int ret;
u16 i;
if (hfunc)
*hfunc = ETH_RSS_HASH_TOP;
if (!indir)
return 0;
if (indir) {
for (i = 0, j = 0; i <= I40E_VFQF_HLUT_MAX_INDEX; i++) {
hlut_val = rd32(hw, I40E_VFQF_HLUT(i));
indir[j++] = hlut_val & 0xff;
indir[j++] = (hlut_val >> 8) & 0xff;
indir[j++] = (hlut_val >> 16) & 0xff;
indir[j++] = (hlut_val >> 24) & 0xff;
}
}
return 0;
seed = key;
lut = kzalloc(I40EVF_HLUT_ARRAY_SIZE, GFP_KERNEL);
if (!lut)
return -ENOMEM;
ret = i40evf_get_rss(vsi, seed, lut, I40EVF_HLUT_ARRAY_SIZE);
if (ret)
goto out;
/* Each 32 bits pointed by 'indir' is stored with a lut entry */
for (i = 0; i < I40EVF_HLUT_ARRAY_SIZE; i++)
indir[i] = (u32)lut[i];
out:
kfree(lut);
return ret;
}
/**
......@@ -668,9 +677,9 @@ static int i40evf_set_rxfh(struct net_device *netdev, const u32 *indir,
const u8 *key, const u8 hfunc)
{
struct i40evf_adapter *adapter = netdev_priv(netdev);
struct i40e_hw *hw = &adapter->hw;
u32 hlut_val;
int i, j;
struct i40e_vsi *vsi = &adapter->vsi;
u8 *seed = NULL;
u16 i;
/* We do not allow change in unsupported parameters */
if (key ||
......@@ -679,15 +688,29 @@ static int i40evf_set_rxfh(struct net_device *netdev, const u32 *indir,
if (!indir)
return 0;
for (i = 0, j = 0; i <= I40E_VFQF_HLUT_MAX_INDEX; i++) {
hlut_val = indir[j++];
hlut_val |= indir[j++] << 8;
hlut_val |= indir[j++] << 16;
hlut_val |= indir[j++] << 24;
wr32(hw, I40E_VFQF_HLUT(i), hlut_val);
if (key) {
if (!vsi->rss_hkey_user) {
vsi->rss_hkey_user = kzalloc(I40EVF_HKEY_ARRAY_SIZE,
GFP_KERNEL);
if (!vsi->rss_hkey_user)
return -ENOMEM;
}
memcpy(vsi->rss_hkey_user, key, I40EVF_HKEY_ARRAY_SIZE);
seed = vsi->rss_hkey_user;
}
if (!vsi->rss_lut_user) {
vsi->rss_lut_user = kzalloc(I40EVF_HLUT_ARRAY_SIZE,
GFP_KERNEL);
if (!vsi->rss_lut_user)
return -ENOMEM;
}
return 0;
/* Each 32 bits pointed by 'indir' is stored with a lut entry */
for (i = 0; i < I40EVF_HLUT_ARRAY_SIZE; i++)
vsi->rss_lut_user[i] = (u8)(indir[i]);
return i40evf_config_rss(vsi, seed, vsi->rss_lut_user,
I40EVF_HLUT_ARRAY_SIZE);
}
static const struct ethtool_ops i40evf_ethtool_ops = {
......
......@@ -255,19 +255,19 @@ void i40evf_configure_queues(struct i40evf_adapter *adapter)
for (i = 0; i < pairs; i++) {
vqpi->txq.vsi_id = vqci->vsi_id;
vqpi->txq.queue_id = i;
vqpi->txq.ring_len = adapter->tx_rings[i]->count;
vqpi->txq.dma_ring_addr = adapter->tx_rings[i]->dma;
vqpi->txq.ring_len = adapter->tx_rings[i].count;
vqpi->txq.dma_ring_addr = adapter->tx_rings[i].dma;
vqpi->txq.headwb_enabled = 1;
vqpi->txq.dma_headwb_addr = vqpi->txq.dma_ring_addr +
(vqpi->txq.ring_len * sizeof(struct i40e_tx_desc));
vqpi->rxq.vsi_id = vqci->vsi_id;
vqpi->rxq.queue_id = i;
vqpi->rxq.ring_len = adapter->rx_rings[i]->count;
vqpi->rxq.dma_ring_addr = adapter->rx_rings[i]->dma;
vqpi->rxq.ring_len = adapter->rx_rings[i].count;
vqpi->rxq.dma_ring_addr = adapter->rx_rings[i].dma;
vqpi->rxq.max_pkt_size = adapter->netdev->mtu
+ ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN;
vqpi->rxq.databuffer_size = adapter->rx_rings[i]->rx_buf_len;
vqpi->rxq.databuffer_size = adapter->rx_rings[i].rx_buf_len;
vqpi++;
}
......@@ -360,7 +360,7 @@ void i40evf_map_queues(struct i40evf_adapter *adapter)
vimi->num_vectors = adapter->num_msix_vectors;
/* Queue vectors first */
for (v_idx = 0; v_idx < q_vectors; v_idx++) {
q_vector = adapter->q_vector[v_idx];
q_vector = adapter->q_vectors + v_idx;
vimi->vecmap[v_idx].vsi_id = adapter->vsi_res->vsi_id;
vimi->vecmap[v_idx].vector_id = v_idx + NONQ_VECS;
vimi->vecmap[v_idx].txq_map = q_vector->ring_mask;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册