提交 820dd7a2 编写于 作者: D David S. Miller

Merge branch 'enetc-xdp-fixes'

Vladimir Oltean says:

====================
Fixups for XDP on NXP ENETC

After some more XDP testing on the NXP LS1028A, this is a set of 10 bug
fixes, simplifications and tweaks, ranging from addressing Toke's feedback
(the network stack can run concurrently with XDP on the same TX rings)
to fixing some OOM conditions seen under TX congestion.
====================
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
......@@ -9,6 +9,26 @@
#include <linux/ptp_classify.h>
#include <net/pkt_sched.h>
static int enetc_num_stack_tx_queues(struct enetc_ndev_priv *priv)
{
int num_tx_rings = priv->num_tx_rings;
int i;
for (i = 0; i < priv->num_rx_rings; i++)
if (priv->rx_ring[i]->xdp.prog)
return num_tx_rings - num_possible_cpus();
return num_tx_rings;
}
static struct enetc_bdr *enetc_rx_ring_from_xdp_tx_ring(struct enetc_ndev_priv *priv,
struct enetc_bdr *tx_ring)
{
int index = &priv->tx_ring[tx_ring->index] - priv->xdp_tx_ring;
return priv->rx_ring[index];
}
static struct sk_buff *enetc_tx_swbd_get_skb(struct enetc_tx_swbd *tx_swbd)
{
if (tx_swbd->is_xdp_tx || tx_swbd->is_xdp_redirect)
......@@ -468,7 +488,6 @@ static void enetc_recycle_xdp_tx_buff(struct enetc_bdr *tx_ring,
struct enetc_tx_swbd *tx_swbd)
{
struct enetc_ndev_priv *priv = netdev_priv(tx_ring->ndev);
struct enetc_bdr *rx_ring = priv->rx_ring[tx_ring->index];
struct enetc_rx_swbd rx_swbd = {
.dma = tx_swbd->dma,
.page = tx_swbd->page,
......@@ -476,6 +495,9 @@ static void enetc_recycle_xdp_tx_buff(struct enetc_bdr *tx_ring,
.dir = tx_swbd->dir,
.len = tx_swbd->len,
};
struct enetc_bdr *rx_ring;
rx_ring = enetc_rx_ring_from_xdp_tx_ring(priv, tx_ring);
if (likely(enetc_swbd_unused(rx_ring))) {
enetc_reuse_page(rx_ring, &rx_swbd);
......@@ -544,7 +566,6 @@ static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget)
if (xdp_frame) {
xdp_return_frame(xdp_frame);
tx_swbd->xdp_frame = NULL;
} else if (skb) {
if (unlikely(tx_swbd->skb->cb[0] &
ENETC_F_TX_ONESTEP_SYNC_TSTAMP)) {
......@@ -558,7 +579,6 @@ static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget)
do_twostep_tstamp = false;
}
napi_consume_skb(skb, napi_budget);
tx_swbd->skb = NULL;
}
tx_byte_cnt += tx_swbd->len;
......@@ -753,27 +773,35 @@ static struct enetc_rx_swbd *enetc_get_rx_buff(struct enetc_bdr *rx_ring,
return rx_swbd;
}
/* Reuse the current page without performing half-page buffer flipping */
static void enetc_put_rx_buff(struct enetc_bdr *rx_ring,
struct enetc_rx_swbd *rx_swbd)
{
if (likely(enetc_page_reusable(rx_swbd->page))) {
size_t buffer_size = ENETC_RXB_TRUESIZE - rx_ring->buffer_offset;
size_t buffer_size = ENETC_RXB_TRUESIZE - rx_ring->buffer_offset;
enetc_reuse_page(rx_ring, rx_swbd);
dma_sync_single_range_for_device(rx_ring->dev, rx_swbd->dma,
rx_swbd->page_offset,
buffer_size, rx_swbd->dir);
rx_swbd->page = NULL;
}
/* Reuse the current page by performing half-page buffer flipping */
static void enetc_flip_rx_buff(struct enetc_bdr *rx_ring,
struct enetc_rx_swbd *rx_swbd)
{
if (likely(enetc_page_reusable(rx_swbd->page))) {
rx_swbd->page_offset ^= ENETC_RXB_TRUESIZE;
page_ref_inc(rx_swbd->page);
enetc_reuse_page(rx_ring, rx_swbd);
/* sync for use by the device */
dma_sync_single_range_for_device(rx_ring->dev, rx_swbd->dma,
rx_swbd->page_offset,
buffer_size, rx_swbd->dir);
enetc_put_rx_buff(rx_ring, rx_swbd);
} else {
dma_unmap_page(rx_ring->dev, rx_swbd->dma, PAGE_SIZE,
rx_swbd->dir);
rx_swbd->page = NULL;
}
rx_swbd->page = NULL;
}
static struct sk_buff *enetc_map_rx_buff_to_skb(struct enetc_bdr *rx_ring,
......@@ -793,7 +821,7 @@ static struct sk_buff *enetc_map_rx_buff_to_skb(struct enetc_bdr *rx_ring,
skb_reserve(skb, rx_ring->buffer_offset);
__skb_put(skb, size);
enetc_put_rx_buff(rx_ring, rx_swbd);
enetc_flip_rx_buff(rx_ring, rx_swbd);
return skb;
}
......@@ -806,7 +834,7 @@ static void enetc_add_rx_buff_to_skb(struct enetc_bdr *rx_ring, int i,
skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_swbd->page,
rx_swbd->page_offset, size, ENETC_RXB_TRUESIZE);
enetc_put_rx_buff(rx_ring, rx_swbd);
enetc_flip_rx_buff(rx_ring, rx_swbd);
}
static bool enetc_check_bd_errors_and_consume(struct enetc_bdr *rx_ring,
......@@ -816,12 +844,14 @@ static bool enetc_check_bd_errors_and_consume(struct enetc_bdr *rx_ring,
if (likely(!(bd_status & ENETC_RXBD_LSTATUS(ENETC_RXBD_ERR_MASK))))
return false;
enetc_put_rx_buff(rx_ring, &rx_ring->rx_swbd[*i]);
enetc_rxbd_next(rx_ring, rxbd, i);
while (!(bd_status & ENETC_RXBD_LSTATUS_F)) {
dma_rmb();
bd_status = le32_to_cpu((*rxbd)->r.lstatus);
enetc_put_rx_buff(rx_ring, &rx_ring->rx_swbd[*i]);
enetc_rxbd_next(rx_ring, rxbd, i);
}
......@@ -1051,7 +1081,9 @@ int enetc_xdp_xmit(struct net_device *ndev, int num_frames,
int xdp_tx_bd_cnt, i, k;
int xdp_tx_frm_cnt = 0;
tx_ring = priv->tx_ring[smp_processor_id()];
enetc_lock_mdio();
tx_ring = priv->xdp_tx_ring[smp_processor_id()];
prefetchw(ENETC_TXBD(*tx_ring, tx_ring->next_to_use));
......@@ -1079,6 +1111,8 @@ int enetc_xdp_xmit(struct net_device *ndev, int num_frames,
tx_ring->stats.xdp_tx += xdp_tx_frm_cnt;
enetc_unlock_mdio();
return xdp_tx_frm_cnt;
}
......@@ -1144,24 +1178,8 @@ static void enetc_build_xdp_buff(struct enetc_bdr *rx_ring, u32 bd_status,
}
}
/* Reuse the current page without performing half-page buffer flipping */
static void enetc_put_xdp_buff(struct enetc_bdr *rx_ring,
struct enetc_rx_swbd *rx_swbd)
{
enetc_reuse_page(rx_ring, rx_swbd);
dma_sync_single_range_for_device(rx_ring->dev, rx_swbd->dma,
rx_swbd->page_offset,
ENETC_RXB_DMA_SIZE_XDP,
rx_swbd->dir);
rx_swbd->page = NULL;
}
/* Convert RX buffer descriptors to TX buffer descriptors. These will be
* recycled back into the RX ring in enetc_clean_tx_ring. We need to scrub the
* RX software BDs because the ownership of the buffer no longer belongs to the
* RX ring, so enetc_refill_rx_ring may not reuse rx_swbd->page.
* recycled back into the RX ring in enetc_clean_tx_ring.
*/
static int enetc_rx_swbd_to_xdp_tx_swbd(struct enetc_tx_swbd *xdp_tx_arr,
struct enetc_bdr *rx_ring,
......@@ -1183,7 +1201,6 @@ static int enetc_rx_swbd_to_xdp_tx_swbd(struct enetc_tx_swbd *xdp_tx_arr,
tx_swbd->is_dma_page = true;
tx_swbd->is_xdp_tx = true;
tx_swbd->is_eof = false;
memset(rx_swbd, 0, sizeof(*rx_swbd));
}
/* We rely on caller providing an rx_ring_last > rx_ring_first */
......@@ -1196,8 +1213,8 @@ static void enetc_xdp_drop(struct enetc_bdr *rx_ring, int rx_ring_first,
int rx_ring_last)
{
while (rx_ring_first != rx_ring_last) {
enetc_put_xdp_buff(rx_ring,
&rx_ring->rx_swbd[rx_ring_first]);
enetc_put_rx_buff(rx_ring,
&rx_ring->rx_swbd[rx_ring_first]);
enetc_bdr_idx_inc(rx_ring, &rx_ring_first);
}
rx_ring->stats.xdp_drops++;
......@@ -1227,8 +1244,8 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring,
int xdp_tx_bd_cnt, xdp_tx_frm_cnt = 0, xdp_redirect_frm_cnt = 0;
struct enetc_tx_swbd xdp_tx_arr[ENETC_MAX_SKB_FRAGS] = {0};
struct enetc_ndev_priv *priv = netdev_priv(rx_ring->ndev);
struct enetc_bdr *tx_ring = priv->tx_ring[rx_ring->index];
int rx_frm_cnt = 0, rx_byte_cnt = 0;
struct enetc_bdr *tx_ring;
int cleaned_cnt, i;
u32 xdp_act;
......@@ -1266,6 +1283,9 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring,
xdp_act = bpf_prog_run_xdp(prog, &xdp_buff);
switch (xdp_act) {
default:
bpf_warn_invalid_xdp_action(xdp_act);
fallthrough;
case XDP_ABORTED:
trace_xdp_exception(rx_ring->ndev, prog, xdp_act);
fallthrough;
......@@ -1281,12 +1301,12 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring,
&i, &cleaned_cnt,
ENETC_RXB_DMA_SIZE_XDP);
if (unlikely(!skb))
/* Exit the switch/case, not the loop */
break;
goto out;
napi_gro_receive(napi, skb);
break;
case XDP_TX:
tx_ring = priv->xdp_tx_ring[rx_ring->index];
xdp_tx_bd_cnt = enetc_rx_swbd_to_xdp_tx_swbd(xdp_tx_arr,
rx_ring,
orig_i, i);
......@@ -1298,6 +1318,17 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring,
tx_ring->stats.xdp_tx += xdp_tx_bd_cnt;
rx_ring->xdp.xdp_tx_in_flight += xdp_tx_bd_cnt;
xdp_tx_frm_cnt++;
/* The XDP_TX enqueue was successful, so we
* need to scrub the RX software BDs because
* the ownership of the buffers no longer
* belongs to the RX ring, and we must prevent
* enetc_refill_rx_ring() from reusing
* rx_swbd->page.
*/
while (orig_i != i) {
rx_ring->rx_swbd[orig_i].page = NULL;
enetc_bdr_idx_inc(rx_ring, &orig_i);
}
}
break;
case XDP_REDIRECT:
......@@ -1318,8 +1349,8 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring,
tmp_orig_i = orig_i;
while (orig_i != i) {
enetc_put_rx_buff(rx_ring,
&rx_ring->rx_swbd[orig_i]);
enetc_flip_rx_buff(rx_ring,
&rx_ring->rx_swbd[orig_i]);
enetc_bdr_idx_inc(rx_ring, &orig_i);
}
......@@ -1330,20 +1361,12 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring,
xdp_redirect_frm_cnt++;
rx_ring->stats.xdp_redirect++;
}
if (unlikely(xdp_redirect_frm_cnt > ENETC_DEFAULT_TX_WORK)) {
xdp_do_flush_map();
xdp_redirect_frm_cnt = 0;
}
break;
default:
bpf_warn_invalid_xdp_action(xdp_act);
}
rx_frm_cnt++;
}
out:
rx_ring->next_to_clean = i;
rx_ring->stats.packets += rx_frm_cnt;
......@@ -2033,6 +2056,7 @@ void enetc_start(struct net_device *ndev)
int enetc_open(struct net_device *ndev)
{
struct enetc_ndev_priv *priv = netdev_priv(ndev);
int num_stack_tx_queues;
int err;
err = enetc_setup_irqs(priv);
......@@ -2051,7 +2075,9 @@ int enetc_open(struct net_device *ndev)
if (err)
goto err_alloc_rx;
err = netif_set_real_num_tx_queues(ndev, priv->num_tx_rings);
num_stack_tx_queues = enetc_num_stack_tx_queues(priv);
err = netif_set_real_num_tx_queues(ndev, num_stack_tx_queues);
if (err)
goto err_set_queues;
......@@ -2124,15 +2150,17 @@ static int enetc_setup_tc_mqprio(struct net_device *ndev, void *type_data)
struct enetc_ndev_priv *priv = netdev_priv(ndev);
struct tc_mqprio_qopt *mqprio = type_data;
struct enetc_bdr *tx_ring;
int num_stack_tx_queues;
u8 num_tc;
int i;
num_stack_tx_queues = enetc_num_stack_tx_queues(priv);
mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
num_tc = mqprio->num_tc;
if (!num_tc) {
netdev_reset_tc(ndev);
netif_set_real_num_tx_queues(ndev, priv->num_tx_rings);
netif_set_real_num_tx_queues(ndev, num_stack_tx_queues);
/* Reset all ring priorities to 0 */
for (i = 0; i < priv->num_tx_rings; i++) {
......@@ -2144,7 +2172,7 @@ static int enetc_setup_tc_mqprio(struct net_device *ndev, void *type_data)
}
/* Check if we have enough BD rings available to accommodate all TCs */
if (num_tc > priv->num_tx_rings) {
if (num_tc > num_stack_tx_queues) {
netdev_err(ndev, "Max %d traffic classes supported\n",
priv->num_tx_rings);
return -EINVAL;
......@@ -2432,8 +2460,9 @@ int enetc_ioctl(struct net_device *ndev, struct ifreq *rq, int cmd)
int enetc_alloc_msix(struct enetc_ndev_priv *priv)
{
struct pci_dev *pdev = priv->si->pdev;
int v_tx_rings;
int first_xdp_tx_ring;
int i, n, err, nvec;
int v_tx_rings;
nvec = ENETC_BDR_INT_BASE_IDX + priv->bdr_int_num;
/* allocate MSIX for both messaging and Rx/Tx interrupts */
......@@ -2508,6 +2537,9 @@ int enetc_alloc_msix(struct enetc_ndev_priv *priv)
}
}
first_xdp_tx_ring = priv->num_tx_rings - num_possible_cpus();
priv->xdp_tx_ring = &priv->tx_ring[first_xdp_tx_ring];
return 0;
fail:
......
......@@ -79,7 +79,7 @@ struct enetc_xdp_data {
};
#define ENETC_RX_RING_DEFAULT_SIZE 2048
#define ENETC_TX_RING_DEFAULT_SIZE 256
#define ENETC_TX_RING_DEFAULT_SIZE 2048
#define ENETC_DEFAULT_TX_WORK (ENETC_TX_RING_DEFAULT_SIZE / 2)
struct enetc_bdr {
......@@ -317,6 +317,7 @@ struct enetc_ndev_priv {
u32 speed; /* store speed for compare update pspeed */
struct enetc_bdr **xdp_tx_ring;
struct enetc_bdr *tx_ring[16];
struct enetc_bdr *rx_ring[16];
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册