提交 919ce2a4 编写于 作者: D David S. Miller

Merge branch 'bnxt_en-Add-XDP-support'

Michael Chan says:

====================
bnxt_en: Add XDP support.

The first 10 patches refactor the code (rx/tx code paths and ring logic)
and add the basic infrastructure to support XDP.  The 11th patch adds
basic ndo_xdp to support XDP_DROP and XDP_PASS only.  The 12th patch
completes the series with XDP_TX.

Thanks to Andy Gospodarek for testing and uncovering some bugs.

v3: Removed Kconfig option.
    Pass modified offset and length to stack for XDP_PASS.
    Improved buffer recycling scheme for XDP_TX.
    Other minor fixes.

v2: Addressed review comments from Alexei Starovoitov, Jakub Kicinski,
and David Miller:
	- Added missing dma syncs.
	- Added XDP headroom support.
	- Added tracing in exception path.
	- Clarified a parameter change.
====================
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
obj-$(CONFIG_BNXT) += bnxt_en.o
bnxt_en-y := bnxt.o bnxt_sriov.o bnxt_ethtool.o bnxt_dcb.o bnxt_ulp.o
bnxt_en-y := bnxt.o bnxt_sriov.o bnxt_ethtool.o bnxt_dcb.o bnxt_ulp.o bnxt_xdp.o
......@@ -416,6 +416,11 @@ struct rx_tpa_end_cmp_ext {
#define BNXT_RX_PAGE_SIZE (1 << BNXT_RX_PAGE_SHIFT)
#define BNXT_MAX_MTU 9500
#define BNXT_MAX_PAGE_MODE_MTU \
((unsigned int)PAGE_SIZE - VLAN_ETH_HLEN - NET_IP_ALIGN - \
XDP_PACKET_HEADROOM)
#define BNXT_MIN_PKT_SIZE 52
#define BNXT_NUM_TESTS(bp) 0
......@@ -507,17 +512,25 @@ struct rx_tpa_end_cmp_ext {
#define BNXT_HWRM_REQS_PER_PAGE (BNXT_PAGE_SIZE / \
BNXT_HWRM_REQ_MAX_SIZE)
#define BNXT_RX_EVENT 1
#define BNXT_AGG_EVENT 2
#define BNXT_TX_EVENT 4
struct bnxt_sw_tx_bd {
struct sk_buff *skb;
DEFINE_DMA_UNMAP_ADDR(mapping);
u8 is_gso;
u8 is_push;
unsigned short nr_frags;
union {
unsigned short nr_frags;
u16 rx_prod;
};
};
struct bnxt_sw_rx_bd {
u8 *data;
DEFINE_DMA_UNMAP_ADDR(mapping);
void *data;
u8 *data_ptr;
dma_addr_t mapping;
};
struct bnxt_sw_rx_agg_bd {
......@@ -558,6 +571,7 @@ struct bnxt_tx_ring_info {
struct bnxt_napi *bnapi;
u16 tx_prod;
u16 tx_cons;
u16 txq_index;
void __iomem *tx_doorbell;
struct tx_bd *tx_desc_ring[MAX_TX_PAGES];
......@@ -576,7 +590,8 @@ struct bnxt_tx_ring_info {
};
struct bnxt_tpa_info {
u8 *data;
void *data;
u8 *data_ptr;
dma_addr_t mapping;
u16 len;
unsigned short gso_type;
......@@ -608,6 +623,8 @@ struct bnxt_rx_ring_info {
void __iomem *rx_doorbell;
void __iomem *rx_agg_doorbell;
struct bpf_prog *xdp_prog;
struct rx_bd *rx_desc_ring[MAX_RX_PAGES];
struct bnxt_sw_rx_bd *rx_buf_ring;
......@@ -654,6 +671,11 @@ struct bnxt_napi {
struct bnxt_rx_ring_info *rx_ring;
struct bnxt_tx_ring_info *tx_ring;
void (*tx_int)(struct bnxt *, struct bnxt_napi *,
int);
u32 flags;
#define BNXT_NAPI_FLAG_XDP 0x1
bool in_reset;
};
......@@ -965,6 +987,7 @@ struct bnxt {
#define BNXT_FLAG_ROCE_CAP (BNXT_FLAG_ROCEV1_CAP | \
BNXT_FLAG_ROCEV2_CAP)
#define BNXT_FLAG_NO_AGG_RINGS 0x20000
#define BNXT_FLAG_RX_PAGE_MODE 0x40000
#define BNXT_FLAG_CHIP_NITRO_A0 0x1000000
#define BNXT_FLAG_ALL_CONFIG_FEATS (BNXT_FLAG_TPA | \
......@@ -976,6 +999,7 @@ struct bnxt {
#define BNXT_NPAR(bp) ((bp)->port_partition_type)
#define BNXT_SINGLE_PF(bp) (BNXT_PF(bp) && !BNXT_NPAR(bp))
#define BNXT_CHIP_TYPE_NITRO_A0(bp) ((bp)->flags & BNXT_FLAG_CHIP_NITRO_A0)
#define BNXT_RX_PAGE_MODE(bp) ((bp)->flags & BNXT_FLAG_RX_PAGE_MODE)
struct bnxt_en_dev *edev;
struct bnxt_en_dev * (*ulp_probe)(struct net_device *);
......@@ -984,12 +1008,21 @@ struct bnxt {
struct bnxt_rx_ring_info *rx_ring;
struct bnxt_tx_ring_info *tx_ring;
u16 *tx_ring_map;
struct sk_buff * (*gro_func)(struct bnxt_tpa_info *, int, int,
struct sk_buff *);
struct sk_buff * (*rx_skb_func)(struct bnxt *,
struct bnxt_rx_ring_info *,
u16, void *, u8 *, dma_addr_t,
unsigned int);
u32 rx_buf_size;
u32 rx_buf_use_size; /* useable size */
u16 rx_offset;
u16 rx_dma_offset;
enum dma_data_direction rx_dir;
u32 rx_ring_size;
u32 rx_agg_ring_size;
u32 rx_copy_thresh;
......@@ -1005,6 +1038,7 @@ struct bnxt {
int tx_nr_pages;
int tx_nr_rings;
int tx_nr_rings_per_tc;
int tx_nr_rings_xdp;
int tx_wake_thresh;
int tx_push_thresh;
......@@ -1140,6 +1174,8 @@ struct bnxt {
u8 num_leds;
struct bnxt_led_info leds[BNXT_MAX_LED];
struct bpf_prog *xdp_prog;
};
#define BNXT_RX_STATS_OFFSET(counter) \
......@@ -1159,7 +1195,23 @@ struct bnxt {
#define SFF_MODULE_ID_QSFP28 0x11
#define BNXT_MAX_PHY_I2C_RESP_SIZE 64
static inline u32 bnxt_tx_avail(struct bnxt *bp, struct bnxt_tx_ring_info *txr)
{
/* Tell compiler to fetch tx indices from memory. */
barrier();
return bp->tx_ring_size -
((txr->tx_prod - txr->tx_cons) & bp->tx_ring_mask);
}
extern const u16 bnxt_lhint_arr[];
int bnxt_alloc_rx_data(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
u16 prod, gfp_t gfp);
void bnxt_reuse_rx_data(struct bnxt_rx_ring_info *rxr, u16 cons, void *data);
void bnxt_set_tpa_flags(struct bnxt *bp);
void bnxt_set_ring_params(struct bnxt *);
int bnxt_set_rx_skb_mode(struct bnxt *bp, bool page_mode);
void bnxt_hwrm_cmd_hdr_init(struct bnxt *, void *, u16, u16, u16);
int _hwrm_send_message(struct bnxt *, void *, u32, int);
int hwrm_send_message(struct bnxt *, void *, u32, int);
......@@ -1168,7 +1220,6 @@ int bnxt_hwrm_func_rgtr_async_events(struct bnxt *bp, unsigned long *bmap,
int bmap_size);
int bnxt_hwrm_vnic_cfg(struct bnxt *bp, u16 vnic_id);
int __bnxt_hwrm_get_tx_rings(struct bnxt *bp, u16 fid, int *tx_rings);
int bnxt_hwrm_reserve_tx_rings(struct bnxt *bp, int *tx_rings);
int bnxt_hwrm_set_coal(struct bnxt *);
unsigned int bnxt_get_max_func_stat_ctxs(struct bnxt *bp);
void bnxt_set_max_func_stat_ctxs(struct bnxt *bp, unsigned int max);
......@@ -1182,6 +1233,7 @@ int bnxt_hwrm_set_link_setting(struct bnxt *, bool, bool);
int bnxt_hwrm_fw_set_time(struct bnxt *);
int bnxt_open_nic(struct bnxt *, bool, bool);
int bnxt_close_nic(struct bnxt *, bool, bool);
int bnxt_reserve_rings(struct bnxt *bp, int tx, int rx, int tcs, int tx_xdp);
int bnxt_setup_mq_tc(struct net_device *dev, u8 tc);
int bnxt_get_max_rings(struct bnxt *, int *, int *, bool);
void bnxt_restore_pf_fw_resources(struct bnxt *bp);
......
......@@ -387,10 +387,10 @@ static int bnxt_set_channels(struct net_device *dev,
struct ethtool_channels *channel)
{
struct bnxt *bp = netdev_priv(dev);
int max_rx_rings, max_tx_rings, tcs;
int req_tx_rings, rsv_tx_rings;
u32 rc = 0;
int req_tx_rings, req_rx_rings, tcs;
bool sh = false;
int tx_xdp = 0;
int rc = 0;
if (channel->other_count)
return -EINVAL;
......@@ -410,32 +410,21 @@ static int bnxt_set_channels(struct net_device *dev,
if (channel->combined_count)
sh = true;
bnxt_get_max_rings(bp, &max_rx_rings, &max_tx_rings, sh);
tcs = netdev_get_num_tc(dev);
if (tcs > 1)
max_tx_rings /= tcs;
if (sh &&
channel->combined_count > max_t(int, max_rx_rings, max_tx_rings))
return -ENOMEM;
if (!sh && (channel->rx_count > max_rx_rings ||
channel->tx_count > max_tx_rings))
return -ENOMEM;
req_tx_rings = sh ? channel->combined_count : channel->tx_count;
req_tx_rings = min_t(int, req_tx_rings, max_tx_rings);
if (tcs > 1)
req_tx_rings *= tcs;
rsv_tx_rings = req_tx_rings;
if (bnxt_hwrm_reserve_tx_rings(bp, &rsv_tx_rings))
return -ENOMEM;
if (rsv_tx_rings < req_tx_rings) {
netdev_warn(dev, "Unable to allocate the requested tx rings\n");
return -ENOMEM;
req_rx_rings = sh ? channel->combined_count : channel->rx_count;
if (bp->tx_nr_rings_xdp) {
if (!sh) {
netdev_err(dev, "Only combined mode supported when XDP is enabled.\n");
return -EINVAL;
}
tx_xdp = req_rx_rings;
}
rc = bnxt_reserve_rings(bp, req_tx_rings, req_rx_rings, tcs, tx_xdp);
if (rc) {
netdev_warn(dev, "Unable to allocate the requested rings\n");
return rc;
}
if (netif_running(dev)) {
......@@ -454,19 +443,17 @@ static int bnxt_set_channels(struct net_device *dev,
if (sh) {
bp->flags |= BNXT_FLAG_SHARED_RINGS;
bp->rx_nr_rings = min_t(int, channel->combined_count,
max_rx_rings);
bp->tx_nr_rings_per_tc = min_t(int, channel->combined_count,
max_tx_rings);
bp->rx_nr_rings = channel->combined_count;
bp->tx_nr_rings_per_tc = channel->combined_count;
} else {
bp->flags &= ~BNXT_FLAG_SHARED_RINGS;
bp->rx_nr_rings = channel->rx_count;
bp->tx_nr_rings_per_tc = channel->tx_count;
}
bp->tx_nr_rings = bp->tx_nr_rings_per_tc;
bp->tx_nr_rings_xdp = tx_xdp;
bp->tx_nr_rings = bp->tx_nr_rings_per_tc + tx_xdp;
if (tcs > 1)
bp->tx_nr_rings = bp->tx_nr_rings_per_tc * tcs;
bp->tx_nr_rings = bp->tx_nr_rings_per_tc * tcs + tx_xdp;
bp->cp_nr_rings = sh ? max_t(int, bp->tx_nr_rings, bp->rx_nr_rings) :
bp->tx_nr_rings + bp->rx_nr_rings;
......
/* Broadcom NetXtreme-C/E network driver.
*
* Copyright (c) 2016-2017 Broadcom Limited
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation.
*/
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/pci.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/if_vlan.h>
#include <linux/bpf.h>
#include <linux/bpf_trace.h>
#include <linux/filter.h>
#include "bnxt_hsi.h"
#include "bnxt.h"
#include "bnxt_xdp.h"
static void bnxt_xmit_xdp(struct bnxt *bp, struct bnxt_tx_ring_info *txr,
dma_addr_t mapping, u32 len, u16 rx_prod)
{
struct bnxt_sw_tx_bd *tx_buf;
struct tx_bd_ext *txbd1;
struct tx_bd *txbd;
u32 flags;
u16 prod;
prod = txr->tx_prod;
tx_buf = &txr->tx_buf_ring[prod];
tx_buf->rx_prod = rx_prod;
txbd = &txr->tx_desc_ring[TX_RING(prod)][TX_IDX(prod)];
flags = (len << TX_BD_LEN_SHIFT) | TX_BD_TYPE_LONG_TX_BD |
(2 << TX_BD_FLAGS_BD_CNT_SHIFT) | TX_BD_FLAGS_COAL_NOW |
TX_BD_FLAGS_PACKET_END | bnxt_lhint_arr[len >> 9];
txbd->tx_bd_len_flags_type = cpu_to_le32(flags);
txbd->tx_bd_opaque = prod;
txbd->tx_bd_haddr = cpu_to_le64(mapping);
prod = NEXT_TX(prod);
txbd1 = (struct tx_bd_ext *)
&txr->tx_desc_ring[TX_RING(prod)][TX_IDX(prod)];
txbd1->tx_bd_hsize_lflags = cpu_to_le32(0);
txbd1->tx_bd_mss = cpu_to_le32(0);
txbd1->tx_bd_cfa_action = cpu_to_le32(0);
txbd1->tx_bd_cfa_meta = cpu_to_le32(0);
prod = NEXT_TX(prod);
txr->tx_prod = prod;
}
void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts)
{
struct bnxt_tx_ring_info *txr = bnapi->tx_ring;
struct bnxt_rx_ring_info *rxr = bnapi->rx_ring;
struct bnxt_sw_tx_bd *tx_buf;
u16 tx_cons = txr->tx_cons;
u16 last_tx_cons = tx_cons;
u16 rx_prod;
int i;
for (i = 0; i < nr_pkts; i++) {
last_tx_cons = tx_cons;
tx_cons = NEXT_TX(tx_cons);
tx_cons = NEXT_TX(tx_cons);
}
txr->tx_cons = tx_cons;
if (bnxt_tx_avail(bp, txr) == bp->tx_ring_size) {
rx_prod = rxr->rx_prod;
} else {
tx_buf = &txr->tx_buf_ring[last_tx_cons];
rx_prod = tx_buf->rx_prod;
}
writel(DB_KEY_RX | rx_prod, rxr->rx_doorbell);
}
/* returns the following:
* true - packet consumed by XDP and new buffer is allocated.
* false - packet should be passed to the stack.
*/
bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons,
struct page *page, u8 **data_ptr, unsigned int *len, u8 *event)
{
struct bpf_prog *xdp_prog = READ_ONCE(rxr->xdp_prog);
struct bnxt_tx_ring_info *txr;
struct bnxt_sw_rx_bd *rx_buf;
struct pci_dev *pdev;
struct xdp_buff xdp;
dma_addr_t mapping;
void *orig_data;
u32 tx_avail;
u32 offset;
u32 act;
if (!xdp_prog)
return false;
pdev = bp->pdev;
txr = rxr->bnapi->tx_ring;
rx_buf = &rxr->rx_buf_ring[cons];
offset = bp->rx_offset;
xdp.data_hard_start = *data_ptr - offset;
xdp.data = *data_ptr;
xdp.data_end = *data_ptr + *len;
orig_data = xdp.data;
mapping = rx_buf->mapping - bp->rx_dma_offset;
dma_sync_single_for_cpu(&pdev->dev, mapping + offset, *len, bp->rx_dir);
rcu_read_lock();
act = bpf_prog_run_xdp(xdp_prog, &xdp);
rcu_read_unlock();
tx_avail = bnxt_tx_avail(bp, txr);
/* If the tx ring is not full, we must not update the rx producer yet
* because we may still be transmitting on some BDs.
*/
if (tx_avail != bp->tx_ring_size)
*event &= ~BNXT_RX_EVENT;
if (orig_data != xdp.data) {
offset = xdp.data - xdp.data_hard_start;
*data_ptr = xdp.data_hard_start + offset;
*len = xdp.data_end - xdp.data;
}
switch (act) {
case XDP_PASS:
return false;
case XDP_TX:
if (tx_avail < 2) {
trace_xdp_exception(bp->dev, xdp_prog, act);
bnxt_reuse_rx_data(rxr, cons, page);
return true;
}
*event = BNXT_TX_EVENT;
dma_sync_single_for_device(&pdev->dev, mapping + offset, *len,
bp->rx_dir);
bnxt_xmit_xdp(bp, txr, mapping + offset, *len,
NEXT_RX(rxr->rx_prod));
bnxt_reuse_rx_data(rxr, cons, page);
return true;
default:
bpf_warn_invalid_xdp_action(act);
/* Fall thru */
case XDP_ABORTED:
trace_xdp_exception(bp->dev, xdp_prog, act);
/* Fall thru */
case XDP_DROP:
bnxt_reuse_rx_data(rxr, cons, page);
break;
}
return true;
}
/* Under rtnl_lock */
static int bnxt_xdp_set(struct bnxt *bp, struct bpf_prog *prog)
{
struct net_device *dev = bp->dev;
int tx_xdp = 0, rc, tc;
struct bpf_prog *old;
if (prog && bp->dev->mtu > BNXT_MAX_PAGE_MODE_MTU) {
netdev_warn(dev, "MTU %d larger than largest XDP supported MTU %d.\n",
bp->dev->mtu, BNXT_MAX_PAGE_MODE_MTU);
return -EOPNOTSUPP;
}
if (!(bp->flags & BNXT_FLAG_SHARED_RINGS)) {
netdev_warn(dev, "ethtool rx/tx channels must be combined to support XDP.\n");
return -EOPNOTSUPP;
}
if (prog)
tx_xdp = bp->rx_nr_rings;
tc = netdev_get_num_tc(dev);
if (!tc)
tc = 1;
rc = bnxt_reserve_rings(bp, bp->tx_nr_rings_per_tc, bp->rx_nr_rings,
tc, tx_xdp);
if (rc) {
netdev_warn(dev, "Unable to reserve enough TX rings to support XDP.\n");
return rc;
}
if (netif_running(dev))
bnxt_close_nic(bp, true, false);
old = xchg(&bp->xdp_prog, prog);
if (old)
bpf_prog_put(old);
if (prog) {
bnxt_set_rx_skb_mode(bp, true);
} else {
int rx, tx;
bnxt_set_rx_skb_mode(bp, false);
bnxt_get_max_rings(bp, &rx, &tx, true);
if (rx > 1) {
bp->flags &= ~BNXT_FLAG_NO_AGG_RINGS;
bp->dev->hw_features |= NETIF_F_LRO;
}
}
bp->tx_nr_rings_xdp = tx_xdp;
bp->tx_nr_rings = bp->tx_nr_rings_per_tc * tc + tx_xdp;
bp->cp_nr_rings = max_t(int, bp->tx_nr_rings, bp->rx_nr_rings);
bp->num_stat_ctxs = bp->cp_nr_rings;
bnxt_set_tpa_flags(bp);
bnxt_set_ring_params(bp);
if (netif_running(dev))
return bnxt_open_nic(bp, true, false);
return 0;
}
int bnxt_xdp(struct net_device *dev, struct netdev_xdp *xdp)
{
struct bnxt *bp = netdev_priv(dev);
int rc;
switch (xdp->command) {
case XDP_SETUP_PROG:
rc = bnxt_xdp_set(bp, xdp->prog);
break;
case XDP_QUERY_PROG:
xdp->prog_attached = !!bp->xdp_prog;
rc = 0;
break;
default:
rc = -EINVAL;
break;
}
return rc;
}
/* Broadcom NetXtreme-C/E network driver.
*
* Copyright (c) 2016-2017 Broadcom Limited
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation.
*/
#ifndef BNXT_XDP_H
#define BNXT_XDP_H
void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts);
bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons,
struct page *page, u8 **data_ptr, unsigned int *len,
u8 *event);
int bnxt_xdp(struct net_device *dev, struct netdev_xdp *xdp);
#endif
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册