提交 fbc7b27a 编写于 作者: K Kiran Patil 提交者: Tony Nguyen

ice: enable ndo_setup_tc support for mqprio_qdisc

Add support in driver for TC_QDISC_SETUP_MQPRIO. This support
enables instantiation of channels in HW using existing MQPRIO
infrastructure which is extended to be offloadable. This
provides a mechanism to configure dedicated set of queues for
each TC.

Configuring channels using "tc mqprio":
--------------------------------------
tc qdisc add dev <ethX> root mqprio num_tc 3 map 0 1 2 \
	queues 4@0 4@4 4@8  hw 1 mode channel

Above command configures 3 TCs having 4 queues each. "hw 1 mode channel"
implies offload of channel configuration to HW. When driver processes
configuration received via "ndo_setup_tc: QDISC_SETUP_MQPRIO", each
TC maps to HW VSI with specified queues.

User can optionally specify bandwidth min and max rate limit per TC
(see example below). If shaper params like min and/or max bandwidth
rate limit are specified, driver configures VSI specific rate limiter
in HW.

Configuring channels and bandwidth shaper parameters using "tc mqprio":
----------------------------------------------------------------
tc qdisc add dev <ethX> root mqprio \
	num_tc 4 map 0 1 2 3 queues 4@0 4@4 4@8 4@12 hw 1 mode channel \
	shaper bw_rlimit min_rate 1Gbit 2Gbit 3Gbit 4Gbit \
	max_rate 4Gbit 5Gbit 6Gbit 7Gbit

Command to view configured TCs:
-----------------------------
tc qdisc show dev <ethX>

Deleting TCs:
------------
tc qdisc del dev <ethX> root mqprio
Signed-off-by: NKiran Patil <kiran.patil@intel.com>
Signed-off-by: NAmritha Nambiar <amritha.nambiar@intel.com>
Signed-off-by: NSudheer Mogilappagari <sudheer.mogilappagari@intel.com>
Tested-by: NBharathi Sreenivas <bharathi.sreenivas@intel.com>
Signed-off-by: NTony Nguyen <anthony.l.nguyen@intel.com>
上级 0754d65b
......@@ -56,6 +56,7 @@
#include "ice_dcb.h"
#include "ice_switch.h"
#include "ice_common.h"
#include "ice_flow.h"
#include "ice_sched.h"
#include "ice_idc_int.h"
#include "ice_virtchnl_pf.h"
......@@ -126,6 +127,13 @@
#define ICE_TX_CTX_DESC(R, i) (&(((struct ice_tx_ctx_desc *)((R)->desc))[i]))
#define ICE_TX_FDIRDESC(R, i) (&(((struct ice_fltr_desc *)((R)->desc))[i]))
/* Minimum BW limit is 500 Kbps for any scheduler node */
#define ICE_MIN_BW_LIMIT 500
/* User can specify BW in either Kbit/Mbit/Gbit and OS converts it in bytes.
* use it to convert user specified BW limit into Kbps
*/
#define ICE_BW_KBPS_DIVISOR 125
/* Macro for each VSI in a PF */
#define ice_for_each_vsi(pf, i) \
for ((i) = 0; (i) < (pf)->num_alloc_vsi; (i)++)
......@@ -440,6 +448,8 @@ struct ice_q_vector {
cpumask_t affinity_mask;
struct irq_affinity_notify affinity_notify;
struct ice_channel *ch;
char name[ICE_INT_NAME_STR_LEN];
u16 total_events; /* net_dim(): number of interrupts processed */
......@@ -595,6 +605,17 @@ struct ice_netdev_priv {
struct ice_repr *repr;
};
/**
* ice_vector_ch_enabled
* @qv: pointer to q_vector, can be NULL
*
* This function returns true if vector is channel enabled otherwise false
*/
static inline bool ice_vector_ch_enabled(struct ice_q_vector *qv)
{
return !!qv->ch; /* Enable it to run with TC */
}
/**
* ice_irq_dynamic_ena - Enable default interrupt generation settings
* @hw: pointer to HW struct
......
......@@ -3194,6 +3194,11 @@ ice_set_rxfh(struct net_device *netdev, const u32 *indir, const u8 *key,
return -EIO;
}
if (ice_is_adq_active(pf)) {
netdev_err(netdev, "Cannot change RSS params with ADQ configured.\n");
return -EOPNOTSUPP;
}
if (key) {
if (!vsi->rss_hkey_user) {
vsi->rss_hkey_user =
......@@ -3404,6 +3409,11 @@ static int ice_set_channels(struct net_device *dev, struct ethtool_channels *ch)
if (ch->other_count != (test_bit(ICE_FLAG_FD_ENA, pf->flags) ? 1U : 0U))
return -EINVAL;
if (ice_is_adq_active(pf)) {
netdev_err(dev, "Cannot set channels with ADQ configured.\n");
return -EOPNOTSUPP;
}
if (test_bit(ICE_FLAG_FD_ENA, pf->flags) && pf->hw.fdir_active_fltr) {
netdev_err(dev, "Cannot set channels when Flow Director filters are active\n");
return -EOPNOTSUPP;
......
......@@ -3954,7 +3954,7 @@ int ice_get_link_speed_mbps(struct ice_vsi *vsi)
*
* Return current VSI link speed and 0 if the speed is unknown.
*/
static int ice_get_link_speed_kbps(struct ice_vsi *vsi)
int ice_get_link_speed_kbps(struct ice_vsi *vsi)
{
int speed_mbps;
......
......@@ -124,6 +124,7 @@ int ice_set_dflt_vsi(struct ice_sw *sw, struct ice_vsi *vsi);
int ice_clear_dflt_vsi(struct ice_sw *sw);
int ice_set_min_bw_limit(struct ice_vsi *vsi, u64 min_tx_rate);
int ice_set_max_bw_limit(struct ice_vsi *vsi, u64 max_tx_rate);
int ice_get_link_speed_kbps(struct ice_vsi *vsi);
int ice_get_link_speed_mbps(struct ice_vsi *vsi);
int
ice_vsi_update_security(struct ice_vsi *vsi, void (*fill)(struct ice_vsi_ctx *));
......
......@@ -55,6 +55,9 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type);
static void ice_vsi_release_all(struct ice_pf *pf);
static int ice_rebuild_channels(struct ice_pf *pf);
static void ice_remove_q_channels(struct ice_vsi *vsi, bool rem_adv_fltr);
bool netif_is_ice(struct net_device *dev)
{
return dev && (dev->netdev_ops == &ice_netdev_ops);
......@@ -106,7 +109,12 @@ static void ice_check_for_hang_subtask(struct ice_pf *pf)
ice_for_each_txq(vsi, i) {
struct ice_tx_ring *tx_ring = vsi->tx_rings[i];
if (tx_ring && tx_ring->desc) {
if (!tx_ring)
continue;
if (ice_ring_ch_enabled(tx_ring))
continue;
if (tx_ring->desc) {
/* If packet counter has not changed the queue is
* likely stalled, so force an interrupt for this
* queue.
......@@ -458,17 +466,21 @@ static void ice_pf_dis_all_vsi(struct ice_pf *pf, bool locked)
}
/**
* ice_prepare_for_reset - prep for the core to reset
* ice_prepare_for_reset - prep for reset
* @pf: board private structure
* @reset_type: reset type requested
*
* Inform or close all dependent features in prep for reset.
*/
static void
ice_prepare_for_reset(struct ice_pf *pf)
ice_prepare_for_reset(struct ice_pf *pf, enum ice_reset_req reset_type)
{
struct ice_hw *hw = &pf->hw;
struct ice_vsi *vsi;
unsigned int i;
dev_dbg(ice_pf_to_dev(pf), "reset_type=%d\n", reset_type);
/* already prepared for reset */
if (test_bit(ICE_PREPARED_FOR_RESET, pf->state))
return;
......@@ -483,6 +495,38 @@ ice_prepare_for_reset(struct ice_pf *pf)
ice_for_each_vf(pf, i)
ice_set_vf_state_qs_dis(&pf->vf[i]);
/* release ADQ specific HW and SW resources */
vsi = ice_get_main_vsi(pf);
if (!vsi)
goto skip;
/* to be on safe side, reset orig_rss_size so that normal flow
* of deciding rss_size can take precedence
*/
vsi->orig_rss_size = 0;
if (test_bit(ICE_FLAG_TC_MQPRIO, pf->flags)) {
if (reset_type == ICE_RESET_PFR) {
vsi->old_ena_tc = vsi->all_enatc;
vsi->old_numtc = vsi->all_numtc;
} else {
ice_remove_q_channels(vsi, true);
/* for other reset type, do not support channel rebuild
* hence reset needed info
*/
vsi->old_ena_tc = 0;
vsi->all_enatc = 0;
vsi->old_numtc = 0;
vsi->all_numtc = 0;
vsi->req_txq = 0;
vsi->req_rxq = 0;
clear_bit(ICE_FLAG_TC_MQPRIO, pf->flags);
memset(&vsi->mqprio_qopt, 0, sizeof(vsi->mqprio_qopt));
}
}
skip:
/* clear SW filtering DB */
ice_clear_hw_tbls(hw);
/* disable the VSIs and their queues that are not already DOWN */
......@@ -502,8 +546,7 @@ ice_prepare_for_reset(struct ice_pf *pf)
/**
* ice_do_reset - Initiate one of many types of resets
* @pf: board private structure
* @reset_type: reset type requested
* before this function was called.
* @reset_type: reset type requested before this function was called.
*/
static void ice_do_reset(struct ice_pf *pf, enum ice_reset_req reset_type)
{
......@@ -512,7 +555,7 @@ static void ice_do_reset(struct ice_pf *pf, enum ice_reset_req reset_type)
dev_dbg(dev, "reset_type 0x%x requested\n", reset_type);
ice_prepare_for_reset(pf);
ice_prepare_for_reset(pf, reset_type);
/* trigger the reset */
if (ice_reset(hw, reset_type)) {
......@@ -570,7 +613,7 @@ static void ice_reset_subtask(struct ice_pf *pf)
/* return if no valid reset type requested */
if (reset_type == ICE_RESET_INVAL)
return;
ice_prepare_for_reset(pf);
ice_prepare_for_reset(pf, reset_type);
/* make sure we are ready to rebuild */
if (ice_check_reset(&pf->hw)) {
......@@ -3243,6 +3286,13 @@ ice_pf_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi)
return ice_vsi_setup(pf, pi, ICE_VSI_PF, ICE_INVAL_VFID, NULL);
}
static struct ice_vsi *
ice_chnl_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi,
struct ice_channel *ch)
{
return ice_vsi_setup(pf, pi, ICE_VSI_CHNL, ICE_INVAL_VFID, ch);
}
/**
* ice_ctrl_vsi_setup - Set up a control VSI
* @pf: board private structure
......@@ -3361,6 +3411,9 @@ static int ice_setup_pf_sw(struct ice_pf *pf)
if (!vsi)
return -ENOMEM;
/* init channel list */
INIT_LIST_HEAD(&vsi->ch_list);
status = ice_cfg_netdev(vsi);
if (status) {
status = -ENODEV;
......@@ -4956,7 +5009,7 @@ ice_pci_err_detected(struct pci_dev *pdev, pci_channel_state_t err)
if (!test_bit(ICE_PREPARED_FOR_RESET, pf->state)) {
set_bit(ICE_PFR_REQ, pf->state);
ice_prepare_for_reset(pf);
ice_prepare_for_reset(pf, ICE_RESET_PFR);
}
}
......@@ -5048,7 +5101,7 @@ static void ice_pci_err_reset_prepare(struct pci_dev *pdev)
if (!test_bit(ICE_PREPARED_FOR_RESET, pf->state)) {
set_bit(ICE_PFR_REQ, pf->state);
ice_prepare_for_reset(pf);
ice_prepare_for_reset(pf, ICE_RESET_PFR);
}
}
}
......@@ -5453,6 +5506,11 @@ ice_set_features(struct net_device *netdev, netdev_features_t features)
ice_clear_arfs(vsi);
}
/* don't turn off hw_tc_offload when ADQ is already enabled */
if (!(features & NETIF_F_HW_TC) && ice_is_adq_active(pf)) {
dev_err(ice_pf_to_dev(pf), "ADQ is active, can't turn hw_tc_offload off\n");
return -EACCES;
}
return ret;
}
......@@ -6307,6 +6365,9 @@ static void ice_vsi_release_all(struct ice_pf *pf)
if (!pf->vsi[i])
continue;
if (pf->vsi[i]->type == ICE_VSI_CHNL)
continue;
err = ice_vsi_release(pf->vsi[i]);
if (err)
dev_dbg(ice_pf_to_dev(pf), "Failed to release pf->vsi[%d], err %d, vsi_num = %d\n",
......@@ -6517,6 +6578,15 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type)
goto err_vsi_rebuild;
}
if (reset_type == ICE_RESET_PFR) {
err = ice_rebuild_channels(pf);
if (err) {
dev_err(dev, "failed to rebuild and replay ADQ VSIs, err %d\n",
err);
goto err_vsi_rebuild;
}
}
/* If Flow Director is active */
if (test_bit(ICE_FLAG_FD_ENA, pf->flags)) {
err = ice_vsi_rebuild_by_type(pf, ICE_VSI_CTRL);
......@@ -7183,6 +7253,799 @@ ice_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv)
}
}
/**
* ice_validate_mqprio_qopt - Validate TCF input parameters
* @vsi: Pointer to VSI
* @mqprio_qopt: input parameters for mqprio queue configuration
*
* This function validates MQPRIO params, such as qcount (power of 2 wherever
* needed), and make sure user doesn't specify qcount and BW rate limit
* for TCs, which are more than "num_tc"
*/
static int
ice_validate_mqprio_qopt(struct ice_vsi *vsi,
struct tc_mqprio_qopt_offload *mqprio_qopt)
{
u64 sum_max_rate = 0, sum_min_rate = 0;
int non_power_of_2_qcount = 0;
struct ice_pf *pf = vsi->back;
int max_rss_q_cnt = 0;
struct device *dev;
int i, speed;
u8 num_tc;
if (vsi->type != ICE_VSI_PF)
return -EINVAL;
if (mqprio_qopt->qopt.offset[0] != 0 ||
mqprio_qopt->qopt.num_tc < 1 ||
mqprio_qopt->qopt.num_tc > ICE_CHNL_MAX_TC)
return -EINVAL;
dev = ice_pf_to_dev(pf);
vsi->ch_rss_size = 0;
num_tc = mqprio_qopt->qopt.num_tc;
for (i = 0; num_tc; i++) {
int qcount = mqprio_qopt->qopt.count[i];
u64 max_rate, min_rate, rem;
if (!qcount)
return -EINVAL;
if (is_power_of_2(qcount)) {
if (non_power_of_2_qcount &&
qcount > non_power_of_2_qcount) {
dev_err(dev, "qcount[%d] cannot be greater than non power of 2 qcount[%d]\n",
qcount, non_power_of_2_qcount);
return -EINVAL;
}
if (qcount > max_rss_q_cnt)
max_rss_q_cnt = qcount;
} else {
if (non_power_of_2_qcount &&
qcount != non_power_of_2_qcount) {
dev_err(dev, "Only one non power of 2 qcount allowed[%d,%d]\n",
qcount, non_power_of_2_qcount);
return -EINVAL;
}
if (qcount < max_rss_q_cnt) {
dev_err(dev, "non power of 2 qcount[%d] cannot be less than other qcount[%d]\n",
qcount, max_rss_q_cnt);
return -EINVAL;
}
max_rss_q_cnt = qcount;
non_power_of_2_qcount = qcount;
}
/* TC command takes input in K/N/Gbps or K/M/Gbit etc but
* converts the bandwidth rate limit into Bytes/s when
* passing it down to the driver. So convert input bandwidth
* from Bytes/s to Kbps
*/
max_rate = mqprio_qopt->max_rate[i];
max_rate = div_u64(max_rate, ICE_BW_KBPS_DIVISOR);
sum_max_rate += max_rate;
/* min_rate is minimum guaranteed rate and it can't be zero */
min_rate = mqprio_qopt->min_rate[i];
min_rate = div_u64(min_rate, ICE_BW_KBPS_DIVISOR);
sum_min_rate += min_rate;
if (min_rate && min_rate < ICE_MIN_BW_LIMIT) {
dev_err(dev, "TC%d: min_rate(%llu Kbps) < %u Kbps\n", i,
min_rate, ICE_MIN_BW_LIMIT);
return -EINVAL;
}
iter_div_u64_rem(min_rate, ICE_MIN_BW_LIMIT, &rem);
if (rem) {
dev_err(dev, "TC%d: Min Rate not multiple of %u Kbps",
i, ICE_MIN_BW_LIMIT);
return -EINVAL;
}
iter_div_u64_rem(max_rate, ICE_MIN_BW_LIMIT, &rem);
if (rem) {
dev_err(dev, "TC%d: Max Rate not multiple of %u Kbps",
i, ICE_MIN_BW_LIMIT);
return -EINVAL;
}
/* min_rate can't be more than max_rate, except when max_rate
* is zero (implies max_rate sought is max line rate). In such
* a case min_rate can be more than max.
*/
if (max_rate && min_rate > max_rate) {
dev_err(dev, "min_rate %llu Kbps can't be more than max_rate %llu Kbps\n",
min_rate, max_rate);
return -EINVAL;
}
if (i >= mqprio_qopt->qopt.num_tc - 1)
break;
if (mqprio_qopt->qopt.offset[i + 1] !=
(mqprio_qopt->qopt.offset[i] + qcount))
return -EINVAL;
}
if (vsi->num_rxq <
(mqprio_qopt->qopt.offset[i] + mqprio_qopt->qopt.count[i]))
return -EINVAL;
if (vsi->num_txq <
(mqprio_qopt->qopt.offset[i] + mqprio_qopt->qopt.count[i]))
return -EINVAL;
speed = ice_get_link_speed_kbps(vsi);
if (sum_max_rate && sum_max_rate > (u64)speed) {
dev_err(dev, "Invalid max Tx rate(%llu) Kbps > speed(%u) Kbps specified\n",
sum_max_rate, speed);
return -EINVAL;
}
if (sum_min_rate && sum_min_rate > (u64)speed) {
dev_err(dev, "Invalid min Tx rate(%llu) Kbps > speed (%u) Kbps specified\n",
sum_min_rate, speed);
return -EINVAL;
}
/* make sure vsi->ch_rss_size is set correctly based on TC's qcount */
vsi->ch_rss_size = max_rss_q_cnt;
return 0;
}
/**
* ice_add_channel - add a channel by adding VSI
* @pf: ptr to PF device
* @sw_id: underlying HW switching element ID
* @ch: ptr to channel structure
*
* Add a channel (VSI) using add_vsi and queue_map
*/
static int ice_add_channel(struct ice_pf *pf, u16 sw_id, struct ice_channel *ch)
{
struct device *dev = ice_pf_to_dev(pf);
struct ice_vsi *vsi;
if (ch->type != ICE_VSI_CHNL) {
dev_err(dev, "add new VSI failed, ch->type %d\n", ch->type);
return -EINVAL;
}
vsi = ice_chnl_vsi_setup(pf, pf->hw.port_info, ch);
if (!vsi || vsi->type != ICE_VSI_CHNL) {
dev_err(dev, "create chnl VSI failure\n");
return -EINVAL;
}
ch->sw_id = sw_id;
ch->vsi_num = vsi->vsi_num;
ch->info.mapping_flags = vsi->info.mapping_flags;
ch->ch_vsi = vsi;
/* set the back pointer of channel for newly created VSI */
vsi->ch = ch;
memcpy(&ch->info.q_mapping, &vsi->info.q_mapping,
sizeof(vsi->info.q_mapping));
memcpy(&ch->info.tc_mapping, vsi->info.tc_mapping,
sizeof(vsi->info.tc_mapping));
return 0;
}
/**
* ice_chnl_cfg_res
* @vsi: the VSI being setup
* @ch: ptr to channel structure
*
* Configure channel specific resources such as rings, vector.
*/
static void ice_chnl_cfg_res(struct ice_vsi *vsi, struct ice_channel *ch)
{
int i;
for (i = 0; i < ch->num_txq; i++) {
struct ice_q_vector *tx_q_vector, *rx_q_vector;
struct ice_ring_container *rc;
struct ice_tx_ring *tx_ring;
struct ice_rx_ring *rx_ring;
tx_ring = vsi->tx_rings[ch->base_q + i];
rx_ring = vsi->rx_rings[ch->base_q + i];
if (!tx_ring || !rx_ring)
continue;
/* setup ring being channel enabled */
tx_ring->ch = ch;
rx_ring->ch = ch;
/* following code block sets up vector specific attributes */
tx_q_vector = tx_ring->q_vector;
rx_q_vector = rx_ring->q_vector;
if (!tx_q_vector && !rx_q_vector)
continue;
if (tx_q_vector) {
tx_q_vector->ch = ch;
/* setup Tx and Rx ITR setting if DIM is off */
rc = &tx_q_vector->tx;
if (!ITR_IS_DYNAMIC(rc))
ice_write_itr(rc, rc->itr_setting);
}
if (rx_q_vector) {
rx_q_vector->ch = ch;
/* setup Tx and Rx ITR setting if DIM is off */
rc = &rx_q_vector->rx;
if (!ITR_IS_DYNAMIC(rc))
ice_write_itr(rc, rc->itr_setting);
}
}
/* it is safe to assume that, if channel has non-zero num_t[r]xq, then
* GLINT_ITR register would have written to perform in-context
* update, hence perform flush
*/
if (ch->num_txq || ch->num_rxq)
ice_flush(&vsi->back->hw);
}
/**
* ice_cfg_chnl_all_res - configure channel resources
* @vsi: pte to main_vsi
* @ch: ptr to channel structure
*
* This function configures channel specific resources such as flow-director
* counter index, and other resources such as queues, vectors, ITR settings
*/
static void
ice_cfg_chnl_all_res(struct ice_vsi *vsi, struct ice_channel *ch)
{
/* configure channel (aka ADQ) resources such as queues, vectors,
* ITR settings for channel specific vectors and anything else
*/
ice_chnl_cfg_res(vsi, ch);
}
/**
* ice_setup_hw_channel - setup new channel
* @pf: ptr to PF device
* @vsi: the VSI being setup
* @ch: ptr to channel structure
* @sw_id: underlying HW switching element ID
* @type: type of channel to be created (VMDq2/VF)
*
* Setup new channel (VSI) based on specified type (VMDq2/VF)
* and configures Tx rings accordingly
*/
static int
ice_setup_hw_channel(struct ice_pf *pf, struct ice_vsi *vsi,
struct ice_channel *ch, u16 sw_id, u8 type)
{
struct device *dev = ice_pf_to_dev(pf);
int ret;
ch->base_q = vsi->next_base_q;
ch->type = type;
ret = ice_add_channel(pf, sw_id, ch);
if (ret) {
dev_err(dev, "failed to add_channel using sw_id %u\n", sw_id);
return ret;
}
/* configure/setup ADQ specific resources */
ice_cfg_chnl_all_res(vsi, ch);
/* make sure to update the next_base_q so that subsequent channel's
* (aka ADQ) VSI queue map is correct
*/
vsi->next_base_q = vsi->next_base_q + ch->num_rxq;
dev_dbg(dev, "added channel: vsi_num %u, num_rxq %u\n", ch->vsi_num,
ch->num_rxq);
return 0;
}
/**
* ice_setup_channel - setup new channel using uplink element
* @pf: ptr to PF device
* @vsi: the VSI being setup
* @ch: ptr to channel structure
*
* Setup new channel (VSI) based on specified type (VMDq2/VF)
* and uplink switching element
*/
static bool
ice_setup_channel(struct ice_pf *pf, struct ice_vsi *vsi,
struct ice_channel *ch)
{
struct device *dev = ice_pf_to_dev(pf);
u16 sw_id;
int ret;
if (vsi->type != ICE_VSI_PF) {
dev_err(dev, "unsupported parent VSI type(%d)\n", vsi->type);
return false;
}
sw_id = pf->first_sw->sw_id;
/* create channel (VSI) */
ret = ice_setup_hw_channel(pf, vsi, ch, sw_id, ICE_VSI_CHNL);
if (ret) {
dev_err(dev, "failed to setup hw_channel\n");
return false;
}
dev_dbg(dev, "successfully created channel()\n");
return ch->ch_vsi ? true : false;
}
/**
* ice_set_bw_limit - setup BW limit for Tx traffic based on max_tx_rate
* @vsi: VSI to be configured
* @max_tx_rate: max Tx rate in Kbps to be configured as maximum BW limit
* @min_tx_rate: min Tx rate in Kbps to be configured as minimum BW limit
*/
static int
ice_set_bw_limit(struct ice_vsi *vsi, u64 max_tx_rate, u64 min_tx_rate)
{
int err;
err = ice_set_min_bw_limit(vsi, min_tx_rate);
if (err)
return err;
return ice_set_max_bw_limit(vsi, max_tx_rate);
}
/**
* ice_create_q_channel - function to create channel
* @vsi: VSI to be configured
* @ch: ptr to channel (it contains channel specific params)
*
* This function creates channel (VSI) using num_queues specified by user,
* reconfigs RSS if needed.
*/
static int ice_create_q_channel(struct ice_vsi *vsi, struct ice_channel *ch)
{
struct ice_pf *pf = vsi->back;
struct device *dev;
if (!ch)
return -EINVAL;
dev = ice_pf_to_dev(pf);
if (!ch->num_txq || !ch->num_rxq) {
dev_err(dev, "Invalid num_queues requested: %d\n", ch->num_rxq);
return -EINVAL;
}
if (!vsi->cnt_q_avail || vsi->cnt_q_avail < ch->num_txq) {
dev_err(dev, "cnt_q_avail (%u) less than num_queues %d\n",
vsi->cnt_q_avail, ch->num_txq);
return -EINVAL;
}
if (!ice_setup_channel(pf, vsi, ch)) {
dev_info(dev, "Failed to setup channel\n");
return -EINVAL;
}
/* configure BW rate limit */
if (ch->ch_vsi && (ch->max_tx_rate || ch->min_tx_rate)) {
int ret;
ret = ice_set_bw_limit(ch->ch_vsi, ch->max_tx_rate,
ch->min_tx_rate);
if (ret)
dev_err(dev, "failed to set Tx rate of %llu Kbps for VSI(%u)\n",
ch->max_tx_rate, ch->ch_vsi->vsi_num);
else
dev_dbg(dev, "set Tx rate of %llu Kbps for VSI(%u)\n",
ch->max_tx_rate, ch->ch_vsi->vsi_num);
}
vsi->cnt_q_avail -= ch->num_txq;
return 0;
}
/**
* ice_remove_q_channels - Remove queue channels for the TCs
* @vsi: VSI to be configured
* @rem_fltr: delete advanced switch filter or not
*
* Remove queue channels for the TCs
*/
static void ice_remove_q_channels(struct ice_vsi *vsi, bool __maybe_unused rem_fltr)
{
struct ice_channel *ch, *ch_tmp;
int i;
/* perform cleanup for channels if they exist */
list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list, list) {
struct ice_vsi *ch_vsi;
list_del(&ch->list);
ch_vsi = ch->ch_vsi;
if (!ch_vsi) {
kfree(ch);
continue;
}
/* Reset queue contexts */
for (i = 0; i < ch->num_rxq; i++) {
struct ice_tx_ring *tx_ring;
struct ice_rx_ring *rx_ring;
tx_ring = vsi->tx_rings[ch->base_q + i];
rx_ring = vsi->rx_rings[ch->base_q + i];
if (tx_ring) {
tx_ring->ch = NULL;
if (tx_ring->q_vector)
tx_ring->q_vector->ch = NULL;
}
if (rx_ring) {
rx_ring->ch = NULL;
if (rx_ring->q_vector)
rx_ring->q_vector->ch = NULL;
}
}
/* clear the VSI from scheduler tree */
ice_rm_vsi_lan_cfg(ch->ch_vsi->port_info, ch->ch_vsi->idx);
/* Delete VSI from FW */
ice_vsi_delete(ch->ch_vsi);
/* Delete VSI from PF and HW VSI arrays */
ice_vsi_clear(ch->ch_vsi);
/* free the channel */
kfree(ch);
}
/* clear the channel VSI map which is stored in main VSI */
ice_for_each_chnl_tc(i)
vsi->tc_map_vsi[i] = NULL;
/* reset main VSI's all TC information */
vsi->all_enatc = 0;
vsi->all_numtc = 0;
}
/**
* ice_rebuild_channels - rebuild channel
* @pf: ptr to PF
*
* Recreate channel VSIs and replay filters
*/
static int ice_rebuild_channels(struct ice_pf *pf)
{
struct device *dev = ice_pf_to_dev(pf);
struct ice_vsi *main_vsi;
bool rem_adv_fltr = true;
struct ice_channel *ch;
struct ice_vsi *vsi;
int tc_idx = 1;
int i, err;
main_vsi = ice_get_main_vsi(pf);
if (!main_vsi)
return 0;
if (!test_bit(ICE_FLAG_TC_MQPRIO, pf->flags) ||
main_vsi->old_numtc == 1)
return 0; /* nothing to be done */
/* reconfigure main VSI based on old value of TC and cached values
* for MQPRIO opts
*/
err = ice_vsi_cfg_tc(main_vsi, main_vsi->old_ena_tc);
if (err) {
dev_err(dev, "failed configuring TC(ena_tc:0x%02x) for HW VSI=%u\n",
main_vsi->old_ena_tc, main_vsi->vsi_num);
return err;
}
/* rebuild ADQ VSIs */
ice_for_each_vsi(pf, i) {
enum ice_vsi_type type;
vsi = pf->vsi[i];
if (!vsi || vsi->type != ICE_VSI_CHNL)
continue;
type = vsi->type;
/* rebuild ADQ VSI */
err = ice_vsi_rebuild(vsi, true);
if (err) {
dev_err(dev, "VSI (type:%s) at index %d rebuild failed, err %d\n",
ice_vsi_type_str(type), vsi->idx, err);
goto cleanup;
}
/* Re-map HW VSI number, using VSI handle that has been
* previously validated in ice_replay_vsi() call above
*/
vsi->vsi_num = ice_get_hw_vsi_num(&pf->hw, vsi->idx);
/* replay filters for the VSI */
err = ice_replay_vsi(&pf->hw, vsi->idx);
if (err) {
dev_err(dev, "VSI (type:%s) replay failed, err %d, VSI index %d\n",
ice_vsi_type_str(type), err, vsi->idx);
rem_adv_fltr = false;
goto cleanup;
}
dev_info(dev, "VSI (type:%s) at index %d rebuilt successfully\n",
ice_vsi_type_str(type), vsi->idx);
/* store ADQ VSI at correct TC index in main VSI's
* map of TC to VSI
*/
main_vsi->tc_map_vsi[tc_idx++] = vsi;
}
/* ADQ VSI(s) has been rebuilt successfully, so setup
* channel for main VSI's Tx and Rx rings
*/
list_for_each_entry(ch, &main_vsi->ch_list, list) {
struct ice_vsi *ch_vsi;
ch_vsi = ch->ch_vsi;
if (!ch_vsi)
continue;
/* reconfig channel resources */
ice_cfg_chnl_all_res(main_vsi, ch);
/* replay BW rate limit if it is non-zero */
if (!ch->max_tx_rate && !ch->min_tx_rate)
continue;
err = ice_set_bw_limit(ch_vsi, ch->max_tx_rate,
ch->min_tx_rate);
if (err)
dev_err(dev, "failed (err:%d) to rebuild BW rate limit, max_tx_rate: %llu Kbps, min_tx_rate: %llu Kbps for VSI(%u)\n",
err, ch->max_tx_rate, ch->min_tx_rate,
ch_vsi->vsi_num);
else
dev_dbg(dev, "successfully rebuild BW rate limit, max_tx_rate: %llu Kbps, min_tx_rate: %llu Kbps for VSI(%u)\n",
ch->max_tx_rate, ch->min_tx_rate,
ch_vsi->vsi_num);
}
/* reconfig RSS for main VSI */
if (main_vsi->ch_rss_size)
ice_vsi_cfg_rss_lut_key(main_vsi);
return 0;
cleanup:
ice_remove_q_channels(main_vsi, rem_adv_fltr);
return err;
}
/**
* ice_create_q_channels - Add queue channel for the given TCs
* @vsi: VSI to be configured
*
* Configures queue channel mapping to the given TCs
*/
static int ice_create_q_channels(struct ice_vsi *vsi)
{
struct ice_pf *pf = vsi->back;
struct ice_channel *ch;
int ret = 0, i;
ice_for_each_chnl_tc(i) {
if (!(vsi->all_enatc & BIT(i)))
continue;
ch = kzalloc(sizeof(*ch), GFP_KERNEL);
if (!ch) {
ret = -ENOMEM;
goto err_free;
}
INIT_LIST_HEAD(&ch->list);
ch->num_rxq = vsi->mqprio_qopt.qopt.count[i];
ch->num_txq = vsi->mqprio_qopt.qopt.count[i];
ch->base_q = vsi->mqprio_qopt.qopt.offset[i];
ch->max_tx_rate = vsi->mqprio_qopt.max_rate[i];
ch->min_tx_rate = vsi->mqprio_qopt.min_rate[i];
/* convert to Kbits/s */
if (ch->max_tx_rate)
ch->max_tx_rate = div_u64(ch->max_tx_rate,
ICE_BW_KBPS_DIVISOR);
if (ch->min_tx_rate)
ch->min_tx_rate = div_u64(ch->min_tx_rate,
ICE_BW_KBPS_DIVISOR);
ret = ice_create_q_channel(vsi, ch);
if (ret) {
dev_err(ice_pf_to_dev(pf),
"failed creating channel TC:%d\n", i);
kfree(ch);
goto err_free;
}
list_add_tail(&ch->list, &vsi->ch_list);
vsi->tc_map_vsi[i] = ch->ch_vsi;
dev_dbg(ice_pf_to_dev(pf),
"successfully created channel: VSI %pK\n", ch->ch_vsi);
}
return 0;
err_free:
ice_remove_q_channels(vsi, false);
return ret;
}
/**
* ice_setup_tc_mqprio_qdisc - configure multiple traffic classes
* @netdev: net device to configure
* @type_data: TC offload data
*/
static int ice_setup_tc_mqprio_qdisc(struct net_device *netdev, void *type_data)
{
struct tc_mqprio_qopt_offload *mqprio_qopt = type_data;
struct ice_netdev_priv *np = netdev_priv(netdev);
struct ice_vsi *vsi = np->vsi;
struct ice_pf *pf = vsi->back;
u16 mode, ena_tc_qdisc = 0;
int cur_txq, cur_rxq;
u8 hw = 0, num_tcf;
struct device *dev;
int ret, i;
dev = ice_pf_to_dev(pf);
num_tcf = mqprio_qopt->qopt.num_tc;
hw = mqprio_qopt->qopt.hw;
mode = mqprio_qopt->mode;
if (!hw) {
clear_bit(ICE_FLAG_TC_MQPRIO, pf->flags);
vsi->ch_rss_size = 0;
memcpy(&vsi->mqprio_qopt, mqprio_qopt, sizeof(*mqprio_qopt));
goto config_tcf;
}
/* Generate queue region map for number of TCF requested */
for (i = 0; i < num_tcf; i++)
ena_tc_qdisc |= BIT(i);
switch (mode) {
case TC_MQPRIO_MODE_CHANNEL:
ret = ice_validate_mqprio_qopt(vsi, mqprio_qopt);
if (ret) {
netdev_err(netdev, "failed to validate_mqprio_qopt(), ret %d\n",
ret);
return ret;
}
memcpy(&vsi->mqprio_qopt, mqprio_qopt, sizeof(*mqprio_qopt));
set_bit(ICE_FLAG_TC_MQPRIO, pf->flags);
break;
default:
return -EINVAL;
}
config_tcf:
/* Requesting same TCF configuration as already enabled */
if (ena_tc_qdisc == vsi->tc_cfg.ena_tc &&
mode != TC_MQPRIO_MODE_CHANNEL)
return 0;
/* Pause VSI queues */
ice_dis_vsi(vsi, true);
if (!hw && !test_bit(ICE_FLAG_TC_MQPRIO, pf->flags))
ice_remove_q_channels(vsi, true);
if (!hw && !test_bit(ICE_FLAG_TC_MQPRIO, pf->flags)) {
vsi->req_txq = min_t(int, ice_get_avail_txq_count(pf),
num_online_cpus());
vsi->req_rxq = min_t(int, ice_get_avail_rxq_count(pf),
num_online_cpus());
} else {
/* logic to rebuild VSI, same like ethtool -L */
u16 offset = 0, qcount_tx = 0, qcount_rx = 0;
for (i = 0; i < num_tcf; i++) {
if (!(ena_tc_qdisc & BIT(i)))
continue;
offset = vsi->mqprio_qopt.qopt.offset[i];
qcount_rx = vsi->mqprio_qopt.qopt.count[i];
qcount_tx = vsi->mqprio_qopt.qopt.count[i];
}
vsi->req_txq = offset + qcount_tx;
vsi->req_rxq = offset + qcount_rx;
/* store away original rss_size info, so that it gets reused
* form ice_vsi_rebuild during tc-qdisc delete stage - to
* determine, what should be the rss_sizefor main VSI
*/
vsi->orig_rss_size = vsi->rss_size;
}
/* save current values of Tx and Rx queues before calling VSI rebuild
* for fallback option
*/
cur_txq = vsi->num_txq;
cur_rxq = vsi->num_rxq;
/* proceed with rebuild main VSI using correct number of queues */
ret = ice_vsi_rebuild(vsi, false);
if (ret) {
/* fallback to current number of queues */
dev_info(dev, "Rebuild failed with new queues, try with current number of queues\n");
vsi->req_txq = cur_txq;
vsi->req_rxq = cur_rxq;
clear_bit(ICE_RESET_FAILED, pf->state);
if (ice_vsi_rebuild(vsi, false)) {
dev_err(dev, "Rebuild of main VSI failed again\n");
return ret;
}
}
vsi->all_numtc = num_tcf;
vsi->all_enatc = ena_tc_qdisc;
ret = ice_vsi_cfg_tc(vsi, ena_tc_qdisc);
if (ret) {
netdev_err(netdev, "failed configuring TC for VSI id=%d\n",
vsi->vsi_num);
goto exit;
}
if (test_bit(ICE_FLAG_TC_MQPRIO, pf->flags)) {
u64 max_tx_rate = vsi->mqprio_qopt.max_rate[0];
u64 min_tx_rate = vsi->mqprio_qopt.min_rate[0];
/* set TC0 rate limit if specified */
if (max_tx_rate || min_tx_rate) {
/* convert to Kbits/s */
if (max_tx_rate)
max_tx_rate = div_u64(max_tx_rate, ICE_BW_KBPS_DIVISOR);
if (min_tx_rate)
min_tx_rate = div_u64(min_tx_rate, ICE_BW_KBPS_DIVISOR);
ret = ice_set_bw_limit(vsi, max_tx_rate, min_tx_rate);
if (!ret) {
dev_dbg(dev, "set Tx rate max %llu min %llu for VSI(%u)\n",
max_tx_rate, min_tx_rate, vsi->vsi_num);
} else {
dev_err(dev, "failed to set Tx rate max %llu min %llu for VSI(%u)\n",
max_tx_rate, min_tx_rate, vsi->vsi_num);
goto exit;
}
}
ret = ice_create_q_channels(vsi);
if (ret) {
netdev_err(netdev, "failed configuring queue channels\n");
goto exit;
} else {
netdev_dbg(netdev, "successfully configured channels\n");
}
}
if (vsi->ch_rss_size)
ice_vsi_cfg_rss_lut_key(vsi);
exit:
/* if error, reset the all_numtc and all_enatc */
if (ret) {
vsi->all_numtc = 0;
vsi->all_enatc = 0;
}
/* resume VSI */
ice_ena_vsi(vsi, true);
return ret;
}
static LIST_HEAD(ice_block_cb_list);
static int
......@@ -7190,6 +8053,8 @@ ice_setup_tc(struct net_device *netdev, enum tc_setup_type type,
void *type_data)
{
struct ice_netdev_priv *np = netdev_priv(netdev);
struct ice_pf *pf = np->vsi->back;
int err;
switch (type) {
case TC_SETUP_BLOCK:
......@@ -7197,6 +8062,12 @@ ice_setup_tc(struct net_device *netdev, enum tc_setup_type type,
&ice_block_cb_list,
ice_setup_tc_block_cb,
np, np, true);
case TC_SETUP_QDISC_MQPRIO:
/* setup traffic classifier for receive side */
mutex_lock(&pf->tc_mutex);
err = ice_setup_tc_mqprio_qdisc(netdev, type_data);
mutex_unlock(&pf->tc_mutex);
return err;
default:
return -EOPNOTSUPP;
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册