提交 67f8b1dc 编写于 作者: T Tariq Toukan 提交者: David S. Miller

net/mlx4_en: Refactor the XDP forwarding rings scheme

Separately manage the two types of TX rings: regular ones, and XDP.
Upon an XDP set, do not borrow regular TX rings and convert them
into XDP ones, but allocate new ones, unless we hit the max number
of rings.
Which means that in systems with smaller #cores we will not consume
the current TX rings for XDP, while we are still in the num TX limit.

XDP TX rings counters are not shown in ethtool statistics.
Instead, XDP counters will be added to the respective RX rings
in a downstream patch.

This has no performance implications.
Signed-off-by: NTariq Toukan <tariqt@mellanox.com>
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
上级 ccc109b8
......@@ -127,15 +127,7 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq,
/* For TX we use the same irq per
ring we assigned for the RX */
struct mlx4_en_cq *rx_cq;
int xdp_index;
/* The xdp tx irq must align with the rx ring that forwards to
* it, so reindex these from 0. This should only happen when
* tx_ring_num is not a multiple of rx_ring_num.
*/
xdp_index = (priv->xdp_ring_num - priv->tx_ring_num) + cq_idx;
if (xdp_index >= 0)
cq_idx = xdp_index;
cq_idx = cq_idx % priv->rx_ring_num;
rx_cq = priv->rx_cq[cq_idx];
cq->vector = rx_cq->vector;
......
......@@ -49,16 +49,19 @@
static int mlx4_en_moderation_update(struct mlx4_en_priv *priv)
{
int i;
int i, t;
int err = 0;
for (i = 0; i < priv->tx_ring_num; i++) {
priv->tx_cq[i]->moder_cnt = priv->tx_frames;
priv->tx_cq[i]->moder_time = priv->tx_usecs;
if (priv->port_up) {
err = mlx4_en_set_cq_moder(priv, priv->tx_cq[i]);
if (err)
return err;
for (t = 0 ; t < MLX4_EN_NUM_TX_TYPES; t++) {
for (i = 0; i < priv->tx_ring_num[t]; i++) {
priv->tx_cq[t][i]->moder_cnt = priv->tx_frames;
priv->tx_cq[t][i]->moder_time = priv->tx_usecs;
if (priv->port_up) {
err = mlx4_en_set_cq_moder(priv,
priv->tx_cq[t][i]);
if (err)
return err;
}
}
}
......@@ -336,7 +339,7 @@ static int mlx4_en_get_sset_count(struct net_device *dev, int sset)
switch (sset) {
case ETH_SS_STATS:
return bitmap_iterator_count(&it) +
(priv->tx_ring_num * 2) +
(priv->tx_ring_num[TX] * 2) +
(priv->rx_ring_num * 3);
case ETH_SS_TEST:
return MLX4_EN_NUM_SELF_TEST - !(priv->mdev->dev->caps.flags
......@@ -397,9 +400,9 @@ static void mlx4_en_get_ethtool_stats(struct net_device *dev,
if (bitmap_iterator_test(&it))
data[index++] = ((unsigned long *)&priv->pkstats)[i];
for (i = 0; i < priv->tx_ring_num; i++) {
data[index++] = priv->tx_ring[i]->packets;
data[index++] = priv->tx_ring[i]->bytes;
for (i = 0; i < priv->tx_ring_num[TX]; i++) {
data[index++] = priv->tx_ring[TX][i]->packets;
data[index++] = priv->tx_ring[TX][i]->bytes;
}
for (i = 0; i < priv->rx_ring_num; i++) {
data[index++] = priv->rx_ring[i]->packets;
......@@ -467,7 +470,7 @@ static void mlx4_en_get_strings(struct net_device *dev,
strcpy(data + (index++) * ETH_GSTRING_LEN,
main_strings[strings]);
for (i = 0; i < priv->tx_ring_num; i++) {
for (i = 0; i < priv->tx_ring_num[TX]; i++) {
sprintf(data + (index++) * ETH_GSTRING_LEN,
"tx%d_packets", i);
sprintf(data + (index++) * ETH_GSTRING_LEN,
......@@ -1060,7 +1063,7 @@ static int mlx4_en_set_ringparam(struct net_device *dev,
if (rx_size == (priv->port_up ? priv->rx_ring[0]->actual_size :
priv->rx_ring[0]->size) &&
tx_size == priv->tx_ring[0]->size)
tx_size == priv->tx_ring[TX][0]->size)
return 0;
tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
......@@ -1105,7 +1108,7 @@ static void mlx4_en_get_ringparam(struct net_device *dev,
param->tx_max_pending = MLX4_EN_MAX_TX_SIZE;
param->rx_pending = priv->port_up ?
priv->rx_ring[0]->actual_size : priv->rx_ring[0]->size;
param->tx_pending = priv->tx_ring[0]->size;
param->tx_pending = priv->tx_ring[TX][0]->size;
}
static u32 mlx4_en_get_rxfh_indir_size(struct net_device *dev)
......@@ -1710,7 +1713,7 @@ static void mlx4_en_get_channels(struct net_device *dev,
channel->max_tx = MLX4_EN_MAX_TX_RING_P_UP;
channel->rx_count = priv->rx_ring_num;
channel->tx_count = priv->tx_ring_num / MLX4_EN_NUM_UP;
channel->tx_count = priv->tx_ring_num[TX] / MLX4_EN_NUM_UP;
}
static int mlx4_en_set_channels(struct net_device *dev,
......@@ -1721,6 +1724,7 @@ static int mlx4_en_set_channels(struct net_device *dev,
struct mlx4_en_port_profile new_prof;
struct mlx4_en_priv *tmp;
int port_up = 0;
int xdp_count;
int err = 0;
if (channel->other_count || channel->combined_count ||
......@@ -1729,20 +1733,25 @@ static int mlx4_en_set_channels(struct net_device *dev,
!channel->tx_count || !channel->rx_count)
return -EINVAL;
if (channel->tx_count * MLX4_EN_NUM_UP <= priv->xdp_ring_num) {
en_err(priv, "Minimum %d tx channels required with XDP on\n",
priv->xdp_ring_num / MLX4_EN_NUM_UP + 1);
return -EINVAL;
}
tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
if (!tmp)
return -ENOMEM;
mutex_lock(&mdev->state_lock);
xdp_count = priv->tx_ring_num[TX_XDP] ? channel->rx_count : 0;
if (channel->tx_count * MLX4_EN_NUM_UP + xdp_count > MAX_TX_RINGS) {
err = -EINVAL;
en_err(priv,
"Total number of TX and XDP rings (%d) exceeds the maximum supported (%d)\n",
channel->tx_count * MLX4_EN_NUM_UP + xdp_count,
MAX_TX_RINGS);
goto out;
}
memcpy(&new_prof, priv->prof, sizeof(struct mlx4_en_port_profile));
new_prof.num_tx_rings_p_up = channel->tx_count;
new_prof.tx_ring_num = channel->tx_count * MLX4_EN_NUM_UP;
new_prof.tx_ring_num[TX] = channel->tx_count * MLX4_EN_NUM_UP;
new_prof.tx_ring_num[TX_XDP] = xdp_count;
new_prof.rx_ring_num = channel->rx_count;
err = mlx4_en_try_alloc_resources(priv, tmp, &new_prof);
......@@ -1756,14 +1765,13 @@ static int mlx4_en_set_channels(struct net_device *dev,
mlx4_en_safe_replace_resources(priv, tmp);
netif_set_real_num_tx_queues(dev, priv->tx_ring_num -
priv->xdp_ring_num);
netif_set_real_num_tx_queues(dev, priv->tx_ring_num[TX]);
netif_set_real_num_rx_queues(dev, priv->rx_ring_num);
if (dev->num_tc)
mlx4_en_setup_tc(dev, MLX4_EN_NUM_UP);
en_warn(priv, "Using %d TX rings\n", priv->tx_ring_num);
en_warn(priv, "Using %d TX rings\n", priv->tx_ring_num[TX]);
en_warn(priv, "Using %d RX rings\n", priv->rx_ring_num);
if (port_up) {
......@@ -1774,8 +1782,8 @@ static int mlx4_en_set_channels(struct net_device *dev,
err = mlx4_en_moderation_update(priv);
out:
kfree(tmp);
mutex_unlock(&mdev->state_lock);
kfree(tmp);
return err;
}
......@@ -1823,11 +1831,15 @@ static int mlx4_en_set_priv_flags(struct net_device *dev, u32 flags)
int ret = 0;
if (bf_enabled_new != bf_enabled_old) {
int t;
if (bf_enabled_new) {
bool bf_supported = true;
for (i = 0; i < priv->tx_ring_num; i++)
bf_supported &= priv->tx_ring[i]->bf_alloced;
for (t = 0; t < MLX4_EN_NUM_TX_TYPES; t++)
for (i = 0; i < priv->tx_ring_num[t]; i++)
bf_supported &=
priv->tx_ring[t][i]->bf_alloced;
if (!bf_supported) {
en_err(priv, "BlueFlame is not supported\n");
......@@ -1839,8 +1851,10 @@ static int mlx4_en_set_priv_flags(struct net_device *dev, u32 flags)
priv->pflags &= ~MLX4_EN_PRIV_FLAGS_BLUEFLAME;
}
for (i = 0; i < priv->tx_ring_num; i++)
priv->tx_ring[i]->bf_enabled = bf_enabled_new;
for (t = 0; t < MLX4_EN_NUM_TX_TYPES; t++)
for (i = 0; i < priv->tx_ring_num[t]; i++)
priv->tx_ring[t][i]->bf_enabled =
bf_enabled_new;
en_info(priv, "BlueFlame %s\n",
bf_enabled_new ? "Enabled" : "Disabled");
......
......@@ -169,7 +169,7 @@ static int mlx4_en_get_profile(struct mlx4_en_dev *mdev)
params->prof[i].tx_ppp = pfctx;
params->prof[i].tx_ring_size = MLX4_EN_DEF_TX_RING_SIZE;
params->prof[i].rx_ring_size = MLX4_EN_DEF_RX_RING_SIZE;
params->prof[i].tx_ring_num = params->num_tx_rings_p_up *
params->prof[i].tx_ring_num[TX] = params->num_tx_rings_p_up *
MLX4_EN_NUM_UP;
params->prof[i].rss_rings = 0;
params->prof[i].inline_thold = inline_thold;
......
......@@ -196,8 +196,8 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset)
priv->port_stats.tso_packets = 0;
priv->port_stats.xmit_more = 0;
for (i = 0; i < priv->tx_ring_num; i++) {
const struct mlx4_en_tx_ring *ring = priv->tx_ring[i];
for (i = 0; i < priv->tx_ring_num[TX]; i++) {
const struct mlx4_en_tx_ring *ring = priv->tx_ring[TX][i];
stats->tx_packets += ring->packets;
stats->tx_bytes += ring->bytes;
......
......@@ -788,7 +788,6 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
struct bpf_prog *xdp_prog;
int doorbell_pending;
struct sk_buff *skb;
int tx_index;
int index;
int nr;
unsigned int length;
......@@ -808,7 +807,6 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
rcu_read_lock();
xdp_prog = rcu_dereference(ring->xdp_prog);
doorbell_pending = 0;
tx_index = (priv->tx_ring_num - priv->xdp_ring_num) + cq->ring;
/* We assume a 1:1 mapping between CQEs and Rx descriptors, so Rx
* descriptor offset can be deduced from the CQE index instead of
......@@ -905,7 +903,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
break;
case XDP_TX:
if (likely(!mlx4_en_xmit_frame(frags, dev,
length, tx_index,
length, cq->ring,
&doorbell_pending)))
goto consumed;
goto xdp_drop; /* Drop on xmit failure */
......@@ -1082,7 +1080,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
out:
rcu_read_unlock();
if (doorbell_pending)
mlx4_en_xmit_doorbell(priv->tx_ring[tx_index]);
mlx4_en_xmit_doorbell(priv->tx_ring[TX_XDP][cq->ring]);
AVG_PERF_COUNTER(priv->pstats.rx_coal_avg, polled);
mlx4_cq_set_ci(&cq->mcq);
......@@ -1162,7 +1160,7 @@ void mlx4_en_calc_rx_buf(struct net_device *dev)
/* bpf requires buffers to be set up as 1 packet per page.
* This only works when num_frags == 1.
*/
if (priv->xdp_ring_num) {
if (priv->tx_ring_num[TX_XDP]) {
dma_dir = PCI_DMA_BIDIRECTIONAL;
/* This will gain efficient xdp frame recycling at the expense
* of more costly truesize accounting
......
......@@ -392,7 +392,8 @@ int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring)
cnt++;
}
netdev_tx_reset_queue(ring->tx_queue);
if (ring->tx_queue)
netdev_tx_reset_queue(ring->tx_queue);
if (cnt)
en_dbg(DRV, priv, "Freed %d uncompleted tx descriptors\n", cnt);
......@@ -405,7 +406,7 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev,
{
struct mlx4_en_priv *priv = netdev_priv(dev);
struct mlx4_cq *mcq = &cq->mcq;
struct mlx4_en_tx_ring *ring = priv->tx_ring[cq->ring];
struct mlx4_en_tx_ring *ring = priv->tx_ring[cq->type][cq->ring];
struct mlx4_cqe *cqe;
u16 index;
u16 new_index, ring_index, stamp_index;
......@@ -807,7 +808,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
bool bf_ok;
tx_ind = skb_get_queue_mapping(skb);
ring = priv->tx_ring[tx_ind];
ring = priv->tx_ring[TX][tx_ind];
if (!priv->port_up)
goto tx_drop;
......@@ -1101,7 +1102,7 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_alloc *frame,
BUILD_BUG_ON_MSG(ALIGN(CTRL_SIZE + DS_SIZE, TXBB_SIZE) != TXBB_SIZE,
"mlx4_en_xmit_frame requires minimum size tx desc");
ring = priv->tx_ring[tx_ind];
ring = priv->tx_ring[TX_XDP][tx_ind];
if (!priv->port_up)
goto tx_drop;
......
......@@ -207,8 +207,10 @@ enum {
*/
enum cq_type {
/* keep tx types first */
TX,
TX_XDP,
#define MLX4_EN_NUM_TX_TYPES (TX_XDP + 1)
RX,
};
......@@ -373,7 +375,7 @@ struct mlx4_en_cq {
struct mlx4_en_port_profile {
u32 flags;
u32 tx_ring_num;
u32 tx_ring_num[MLX4_EN_NUM_TX_TYPES];
u32 rx_ring_num;
u32 tx_ring_size;
u32 rx_ring_size;
......@@ -570,17 +572,16 @@ struct mlx4_en_priv {
u32 flags;
u8 num_tx_rings_p_up;
u32 tx_work_limit;
u32 tx_ring_num;
u32 tx_ring_num[MLX4_EN_NUM_TX_TYPES];
u32 rx_ring_num;
u32 rx_skb_size;
struct mlx4_en_frag_info frag_info[MLX4_EN_MAX_RX_FRAGS];
u16 num_frags;
u16 log_rx_info;
int xdp_ring_num;
struct mlx4_en_tx_ring **tx_ring;
struct mlx4_en_tx_ring **tx_ring[MLX4_EN_NUM_TX_TYPES];
struct mlx4_en_rx_ring *rx_ring[MAX_RX_RINGS];
struct mlx4_en_cq **tx_cq;
struct mlx4_en_cq **tx_cq[MLX4_EN_NUM_TX_TYPES];
struct mlx4_en_cq *rx_cq[MAX_RX_RINGS];
struct mlx4_qp drop_qp;
struct work_struct rx_mode_task;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册