提交 aa6be2b9 编写于 作者: D David S. Miller

Merge branch 'cpsw-Add-XDP-support'

Ivan Khoronzhuk says:

====================
net: ethernet: ti: cpsw: Add XDP support

This patchset adds XDP support for TI cpsw driver and base it on
page_pool allocator. It was verified on af_xdp socket drop,
af_xdp l2f, ebpf XDP_DROP, XDP_REDIRECT, XDP_PASS, XDP_TX.

It was verified with following configs enabled:
CONFIG_JIT=y
CONFIG_BPFILTER=y
CONFIG_BPF_SYSCALL=y
CONFIG_XDP_SOCKETS=y
CONFIG_BPF_EVENTS=y
CONFIG_HAVE_EBPF_JIT=y
CONFIG_BPF_JIT=y
CONFIG_CGROUP_BPF=y

Link on previous v7:
https://lkml.org/lkml/2019/7/4/715

Also regular tests with iperf2 were done in order to verify impact on
regular netstack performance, compared with base commit:
https://pastebin.com/JSMT0iZ4

v8..v9:
- fix warnings on arm64 caused by typos in type casting

v7..v8:
- corrected dma calculation based on headroom instead of hard start
- minor comment changes

v6..v7:
- rolled back to v4 solution but with small modification
- picked up patch:
  https://www.spinics.net/lists/netdev/msg583145.html
- added changes related to netsec fix and cpsw

v5..v6:
- do changes that is rx_dev while redirect/flush cycle is kept the same
- dropped net: ethernet: ti: davinci_cpdma: return handler status
- other changes desc in patches

v4..v5:
- added two plreliminary patches:
  net: ethernet: ti: davinci_cpdma: allow desc split while down
  net: ethernet: ti: cpsw_ethtool: allow res split while down
- added xdp alocator refcnt on xdp level, avoiding page pool refcnt
- moved flush status as separate argument for cpdma_chan_process
- reworked cpsw code according to last changes to allocator
- added missed statistic counter

v3..v4:
- added page pool user counter
- use same pool for ndevs in dual mac
- restructured page pool create/destroy according to the last changes in API

v2..v3:
- each rxq and ndev has its own page pool

v1..v2:
- combined xdp_xmit functions
- used page allocation w/o refcnt juggle
- unmapped page for skb netstack
- moved rxq/page pool allocation to open/close pair
- added several preliminary patches:
  net: page_pool: add helper function to retrieve dma addresses
  net: page_pool: add helper function to unmap dma addresses
  net: ethernet: ti: cpsw: use cpsw as drv data
  net: ethernet: ti: cpsw_ethtool: simplify slave loops
====================
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
......@@ -577,8 +577,6 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
}
err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
MEM_TYPE_PAGE_POOL, rq->page_pool);
if (err)
page_pool_free(rq->page_pool);
}
if (err)
goto err_free;
......@@ -646,6 +644,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
if (rq->xdp_prog)
bpf_prog_put(rq->xdp_prog);
xdp_rxq_info_unreg(&rq->xdp_rxq);
page_pool_destroy(rq->page_pool);
mlx5_wq_destroy(&rq->wq_ctrl);
return err;
......@@ -680,6 +679,7 @@ static void mlx5e_free_rq(struct mlx5e_rq *rq)
}
xdp_rxq_info_unreg(&rq->xdp_rxq);
page_pool_destroy(rq->page_pool);
mlx5_wq_destroy(&rq->wq_ctrl);
}
......
......@@ -1212,15 +1212,11 @@ static void netsec_uninit_pkt_dring(struct netsec_priv *priv, int id)
}
}
/* Rx is currently using page_pool
* since the pool is created during netsec_setup_rx_dring(), we need to
* free the pool manually if the registration failed
*/
/* Rx is currently using page_pool */
if (id == NETSEC_RING_RX) {
if (xdp_rxq_info_is_reg(&dring->xdp_rxq))
xdp_rxq_info_unreg(&dring->xdp_rxq);
else
page_pool_free(dring->page_pool);
page_pool_destroy(dring->page_pool);
}
memset(dring->desc, 0, sizeof(struct netsec_desc) * DESC_NUM);
......
......@@ -50,6 +50,7 @@ config TI_CPSW
depends on ARCH_DAVINCI || ARCH_OMAP2PLUS || COMPILE_TEST
select TI_DAVINCI_MDIO
select MFD_SYSCON
select PAGE_POOL
select REGMAP
---help---
This driver supports TI's CPSW Ethernet Switch.
......
......@@ -31,6 +31,10 @@
#include <linux/if_vlan.h>
#include <linux/kmemleak.h>
#include <linux/sys_soc.h>
#include <net/page_pool.h>
#include <linux/bpf.h>
#include <linux/bpf_trace.h>
#include <linux/filter.h>
#include <linux/pinctrl/consumer.h>
#include <net/pkt_cls.h>
......@@ -60,6 +64,10 @@ static int descs_pool_size = CPSW_CPDMA_DESCS_POOL_SIZE_DEFAULT;
module_param(descs_pool_size, int, 0444);
MODULE_PARM_DESC(descs_pool_size, "Number of CPDMA CPPI descriptors in pool");
/* The buf includes headroom compatible with both skb and xdpf */
#define CPSW_HEADROOM_NA (max(XDP_PACKET_HEADROOM, NET_SKB_PAD) + NET_IP_ALIGN)
#define CPSW_HEADROOM ALIGN(CPSW_HEADROOM_NA, sizeof(long))
#define for_each_slave(priv, func, arg...) \
do { \
struct cpsw_slave *slave; \
......@@ -74,6 +82,11 @@ MODULE_PARM_DESC(descs_pool_size, "Number of CPDMA CPPI descriptors in pool");
(func)(slave++, ##arg); \
} while (0)
#define CPSW_XMETA_OFFSET ALIGN(sizeof(struct xdp_frame), sizeof(long))
#define CPSW_XDP_CONSUMED 1
#define CPSW_XDP_PASS 0
static int cpsw_ndo_vlan_rx_add_vid(struct net_device *ndev,
__be16 proto, u16 vid);
......@@ -337,24 +350,58 @@ void cpsw_intr_disable(struct cpsw_common *cpsw)
return;
}
static int cpsw_is_xdpf_handle(void *handle)
{
return (unsigned long)handle & BIT(0);
}
static void *cpsw_xdpf_to_handle(struct xdp_frame *xdpf)
{
return (void *)((unsigned long)xdpf | BIT(0));
}
static struct xdp_frame *cpsw_handle_to_xdpf(void *handle)
{
return (struct xdp_frame *)((unsigned long)handle & ~BIT(0));
}
struct __aligned(sizeof(long)) cpsw_meta_xdp {
struct net_device *ndev;
int ch;
};
void cpsw_tx_handler(void *token, int len, int status)
{
struct cpsw_meta_xdp *xmeta;
struct xdp_frame *xdpf;
struct net_device *ndev;
struct netdev_queue *txq;
struct sk_buff *skb = token;
struct net_device *ndev = skb->dev;
struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
struct sk_buff *skb;
int ch;
if (cpsw_is_xdpf_handle(token)) {
xdpf = cpsw_handle_to_xdpf(token);
xmeta = (void *)xdpf + CPSW_XMETA_OFFSET;
ndev = xmeta->ndev;
ch = xmeta->ch;
xdp_return_frame(xdpf);
} else {
skb = token;
ndev = skb->dev;
ch = skb_get_queue_mapping(skb);
cpts_tx_timestamp(ndev_to_cpsw(ndev)->cpts, skb);
dev_kfree_skb_any(skb);
}
/* Check whether the queue is stopped due to stalled tx dma, if the
* queue is stopped then start the queue as we have free desc for tx
*/
txq = netdev_get_tx_queue(ndev, skb_get_queue_mapping(skb));
txq = netdev_get_tx_queue(ndev, ch);
if (unlikely(netif_tx_queue_stopped(txq)))
netif_tx_wake_queue(txq);
cpts_tx_timestamp(cpsw->cpts, skb);
ndev->stats.tx_packets++;
ndev->stats.tx_bytes += len;
dev_kfree_skb_any(skb);
}
static void cpsw_rx_vlan_encap(struct sk_buff *skb)
......@@ -400,24 +447,252 @@ static void cpsw_rx_vlan_encap(struct sk_buff *skb)
}
}
static int cpsw_xdp_tx_frame(struct cpsw_priv *priv, struct xdp_frame *xdpf,
struct page *page)
{
struct cpsw_common *cpsw = priv->cpsw;
struct cpsw_meta_xdp *xmeta;
struct cpdma_chan *txch;
dma_addr_t dma;
int ret, port;
xmeta = (void *)xdpf + CPSW_XMETA_OFFSET;
xmeta->ndev = priv->ndev;
xmeta->ch = 0;
txch = cpsw->txv[0].ch;
port = priv->emac_port + cpsw->data.dual_emac;
if (page) {
dma = page_pool_get_dma_addr(page);
dma += xdpf->headroom + sizeof(struct xdp_frame);
ret = cpdma_chan_submit_mapped(txch, cpsw_xdpf_to_handle(xdpf),
dma, xdpf->len, port);
} else {
if (sizeof(*xmeta) > xdpf->headroom) {
xdp_return_frame_rx_napi(xdpf);
return -EINVAL;
}
ret = cpdma_chan_submit(txch, cpsw_xdpf_to_handle(xdpf),
xdpf->data, xdpf->len, port);
}
if (ret) {
priv->ndev->stats.tx_dropped++;
xdp_return_frame_rx_napi(xdpf);
}
return ret;
}
static int cpsw_run_xdp(struct cpsw_priv *priv, int ch, struct xdp_buff *xdp,
struct page *page)
{
struct cpsw_common *cpsw = priv->cpsw;
struct net_device *ndev = priv->ndev;
int ret = CPSW_XDP_CONSUMED;
struct xdp_frame *xdpf;
struct bpf_prog *prog;
u32 act;
rcu_read_lock();
prog = READ_ONCE(priv->xdp_prog);
if (!prog) {
ret = CPSW_XDP_PASS;
goto out;
}
act = bpf_prog_run_xdp(prog, xdp);
switch (act) {
case XDP_PASS:
ret = CPSW_XDP_PASS;
break;
case XDP_TX:
xdpf = convert_to_xdp_frame(xdp);
if (unlikely(!xdpf))
goto drop;
cpsw_xdp_tx_frame(priv, xdpf, page);
break;
case XDP_REDIRECT:
if (xdp_do_redirect(ndev, xdp, prog))
goto drop;
/* Have to flush here, per packet, instead of doing it in bulk
* at the end of the napi handler. The RX devices on this
* particular hardware is sharing a common queue, so the
* incoming device might change per packet.
*/
xdp_do_flush_map();
break;
default:
bpf_warn_invalid_xdp_action(act);
/* fall through */
case XDP_ABORTED:
trace_xdp_exception(ndev, prog, act);
/* fall through -- handle aborts by dropping packet */
case XDP_DROP:
goto drop;
}
out:
rcu_read_unlock();
return ret;
drop:
rcu_read_unlock();
page_pool_recycle_direct(cpsw->page_pool[ch], page);
return ret;
}
static unsigned int cpsw_rxbuf_total_len(unsigned int len)
{
len += CPSW_HEADROOM;
len += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
return SKB_DATA_ALIGN(len);
}
static struct page_pool *cpsw_create_page_pool(struct cpsw_common *cpsw,
int size)
{
struct page_pool_params pp_params;
struct page_pool *pool;
pp_params.order = 0;
pp_params.flags = PP_FLAG_DMA_MAP;
pp_params.pool_size = size;
pp_params.nid = NUMA_NO_NODE;
pp_params.dma_dir = DMA_BIDIRECTIONAL;
pp_params.dev = cpsw->dev;
pool = page_pool_create(&pp_params);
if (IS_ERR(pool))
dev_err(cpsw->dev, "cannot create rx page pool\n");
return pool;
}
static int cpsw_ndev_create_xdp_rxq(struct cpsw_priv *priv, int ch)
{
struct cpsw_common *cpsw = priv->cpsw;
struct xdp_rxq_info *rxq;
struct page_pool *pool;
int ret;
pool = cpsw->page_pool[ch];
rxq = &priv->xdp_rxq[ch];
ret = xdp_rxq_info_reg(rxq, priv->ndev, ch);
if (ret)
return ret;
ret = xdp_rxq_info_reg_mem_model(rxq, MEM_TYPE_PAGE_POOL, pool);
if (ret)
xdp_rxq_info_unreg(rxq);
return ret;
}
static void cpsw_ndev_destroy_xdp_rxq(struct cpsw_priv *priv, int ch)
{
struct xdp_rxq_info *rxq = &priv->xdp_rxq[ch];
if (!xdp_rxq_info_is_reg(rxq))
return;
xdp_rxq_info_unreg(rxq);
}
static int cpsw_create_rx_pool(struct cpsw_common *cpsw, int ch)
{
struct page_pool *pool;
int ret = 0, pool_size;
pool_size = cpdma_chan_get_rx_buf_num(cpsw->rxv[ch].ch);
pool = cpsw_create_page_pool(cpsw, pool_size);
if (IS_ERR(pool))
ret = PTR_ERR(pool);
else
cpsw->page_pool[ch] = pool;
return ret;
}
void cpsw_destroy_xdp_rxqs(struct cpsw_common *cpsw)
{
struct net_device *ndev;
int i, ch;
for (ch = 0; ch < cpsw->rx_ch_num; ch++) {
for (i = 0; i < cpsw->data.slaves; i++) {
ndev = cpsw->slaves[i].ndev;
if (!ndev)
continue;
cpsw_ndev_destroy_xdp_rxq(netdev_priv(ndev), ch);
}
page_pool_destroy(cpsw->page_pool[ch]);
cpsw->page_pool[ch] = NULL;
}
}
int cpsw_create_xdp_rxqs(struct cpsw_common *cpsw)
{
struct net_device *ndev;
int i, ch, ret;
for (ch = 0; ch < cpsw->rx_ch_num; ch++) {
ret = cpsw_create_rx_pool(cpsw, ch);
if (ret)
goto err_cleanup;
/* using same page pool is allowed as no running rx handlers
* simultaneously for both ndevs
*/
for (i = 0; i < cpsw->data.slaves; i++) {
ndev = cpsw->slaves[i].ndev;
if (!ndev)
continue;
ret = cpsw_ndev_create_xdp_rxq(netdev_priv(ndev), ch);
if (ret)
goto err_cleanup;
}
}
return 0;
err_cleanup:
cpsw_destroy_xdp_rxqs(cpsw);
return ret;
}
static void cpsw_rx_handler(void *token, int len, int status)
{
struct cpdma_chan *ch;
struct sk_buff *skb = token;
struct sk_buff *new_skb;
struct net_device *ndev = skb->dev;
int ret = 0, port;
struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
struct page *new_page, *page = token;
void *pa = page_address(page);
struct cpsw_meta_xdp *xmeta = pa + CPSW_XMETA_OFFSET;
struct cpsw_common *cpsw = ndev_to_cpsw(xmeta->ndev);
int pkt_size = cpsw->rx_packet_max;
int ret = 0, port, ch = xmeta->ch;
int headroom = CPSW_HEADROOM;
struct net_device *ndev = xmeta->ndev;
struct cpsw_priv *priv;
struct page_pool *pool;
struct sk_buff *skb;
struct xdp_buff xdp;
dma_addr_t dma;
if (cpsw->data.dual_emac) {
if (cpsw->data.dual_emac && status >= 0) {
port = CPDMA_RX_SOURCE_PORT(status);
if (port) {
if (port)
ndev = cpsw->slaves[--port].ndev;
skb->dev = ndev;
}
}
priv = netdev_priv(ndev);
pool = cpsw->page_pool[ch];
if (unlikely(status < 0) || unlikely(!netif_running(ndev))) {
/* In dual emac mode check for all interfaces */
if (cpsw->data.dual_emac && cpsw->usage_count &&
......@@ -426,43 +701,87 @@ static void cpsw_rx_handler(void *token, int len, int status)
* is already down and the other interface is up
* and running, instead of freeing which results
* in reducing of the number of rx descriptor in
* DMA engine, requeue skb back to cpdma.
* DMA engine, requeue page back to cpdma.
*/
new_skb = skb;
new_page = page;
goto requeue;
}
/* the interface is going down, skbs are purged */
dev_kfree_skb_any(skb);
/* the interface is going down, pages are purged */
page_pool_recycle_direct(pool, page);
return;
}
new_skb = netdev_alloc_skb_ip_align(ndev, cpsw->rx_packet_max);
if (new_skb) {
skb_copy_queue_mapping(new_skb, skb);
new_page = page_pool_dev_alloc_pages(pool);
if (unlikely(!new_page)) {
new_page = page;
ndev->stats.rx_dropped++;
goto requeue;
}
if (priv->xdp_prog) {
if (status & CPDMA_RX_VLAN_ENCAP) {
xdp.data = pa + CPSW_HEADROOM +
CPSW_RX_VLAN_ENCAP_HDR_SIZE;
xdp.data_end = xdp.data + len -
CPSW_RX_VLAN_ENCAP_HDR_SIZE;
} else {
xdp.data = pa + CPSW_HEADROOM;
xdp.data_end = xdp.data + len;
}
xdp_set_data_meta_invalid(&xdp);
xdp.data_hard_start = pa;
xdp.rxq = &priv->xdp_rxq[ch];
ret = cpsw_run_xdp(priv, ch, &xdp, page);
if (ret != CPSW_XDP_PASS)
goto requeue;
/* XDP prog might have changed packet data and boundaries */
len = xdp.data_end - xdp.data;
headroom = xdp.data - xdp.data_hard_start;
/* XDP prog can modify vlan tag, so can't use encap header */
status &= ~CPDMA_RX_VLAN_ENCAP;
}
/* pass skb to netstack if no XDP prog or returned XDP_PASS */
skb = build_skb(pa, cpsw_rxbuf_total_len(pkt_size));
if (!skb) {
ndev->stats.rx_dropped++;
page_pool_recycle_direct(pool, page);
goto requeue;
}
skb_reserve(skb, headroom);
skb_put(skb, len);
skb->dev = ndev;
if (status & CPDMA_RX_VLAN_ENCAP)
cpsw_rx_vlan_encap(skb);
priv = netdev_priv(ndev);
if (priv->rx_ts_enabled)
cpts_rx_timestamp(cpsw->cpts, skb);
skb->protocol = eth_type_trans(skb, ndev);
/* unmap page as no netstack skb page recycling */
page_pool_release_page(pool, page);
netif_receive_skb(skb);
ndev->stats.rx_bytes += len;
ndev->stats.rx_packets++;
kmemleak_not_leak(new_skb);
} else {
ndev->stats.rx_dropped++;
new_skb = skb;
}
requeue:
ch = cpsw->rxv[skb_get_queue_mapping(new_skb)].ch;
ret = cpdma_chan_submit(ch, new_skb, new_skb->data,
skb_tailroom(new_skb), 0);
xmeta = page_address(new_page) + CPSW_XMETA_OFFSET;
xmeta->ndev = ndev;
xmeta->ch = ch;
dma = page_pool_get_dma_addr(new_page) + CPSW_HEADROOM;
ret = cpdma_chan_submit_mapped(cpsw->rxv[ch].ch, new_page, dma,
pkt_size, 0);
if (ret < 0) {
WARN_ON(ret == -ENOMEM);
dev_kfree_skb_any(new_skb);
page_pool_recycle_direct(pool, new_page);
}
}
......@@ -1032,33 +1351,39 @@ static void cpsw_init_host_port(struct cpsw_priv *priv)
int cpsw_fill_rx_channels(struct cpsw_priv *priv)
{
struct cpsw_common *cpsw = priv->cpsw;
struct sk_buff *skb;
struct cpsw_meta_xdp *xmeta;
struct page_pool *pool;
struct page *page;
int ch_buf_num;
int ch, i, ret;
dma_addr_t dma;
for (ch = 0; ch < cpsw->rx_ch_num; ch++) {
pool = cpsw->page_pool[ch];
ch_buf_num = cpdma_chan_get_rx_buf_num(cpsw->rxv[ch].ch);
for (i = 0; i < ch_buf_num; i++) {
skb = __netdev_alloc_skb_ip_align(priv->ndev,
cpsw->rx_packet_max,
GFP_KERNEL);
if (!skb) {
cpsw_err(priv, ifup, "cannot allocate skb\n");
page = page_pool_dev_alloc_pages(pool);
if (!page) {
cpsw_err(priv, ifup, "allocate rx page err\n");
return -ENOMEM;
}
skb_set_queue_mapping(skb, ch);
ret = cpdma_chan_idle_submit(cpsw->rxv[ch].ch, skb,
skb->data,
skb_tailroom(skb), 0);
xmeta = page_address(page) + CPSW_XMETA_OFFSET;
xmeta->ndev = priv->ndev;
xmeta->ch = ch;
dma = page_pool_get_dma_addr(page) + CPSW_HEADROOM;
ret = cpdma_chan_idle_submit_mapped(cpsw->rxv[ch].ch,
page, dma,
cpsw->rx_packet_max,
0);
if (ret < 0) {
cpsw_err(priv, ifup,
"cannot submit skb to channel %d rx, error %d\n",
"cannot submit page to channel %d rx, error %d\n",
ch, ret);
kfree_skb(skb);
page_pool_recycle_direct(pool, page);
return ret;
}
kmemleak_not_leak(skb);
}
cpsw_info(priv, ifup, "ch %d rx, submitted %d descriptors\n",
......@@ -1394,6 +1719,13 @@ static int cpsw_ndo_open(struct net_device *ndev)
enable_irq(cpsw->irqs_table[0]);
}
/* create rxqs for both infs in dual mac as they use same pool
* and must be destroyed together when no users.
*/
ret = cpsw_create_xdp_rxqs(cpsw);
if (ret < 0)
goto err_cleanup;
ret = cpsw_fill_rx_channels(priv);
if (ret < 0)
goto err_cleanup;
......@@ -1422,9 +1754,10 @@ static int cpsw_ndo_open(struct net_device *ndev)
err_cleanup:
if (!cpsw->usage_count) {
cpdma_ctlr_stop(cpsw->dma);
for_each_slave(priv, cpsw_slave_stop, cpsw);
cpsw_destroy_xdp_rxqs(cpsw);
}
for_each_slave(priv, cpsw_slave_stop, cpsw);
pm_runtime_put_sync(cpsw->dev);
netif_carrier_off(priv->ndev);
return ret;
......@@ -1447,6 +1780,7 @@ static int cpsw_ndo_stop(struct net_device *ndev)
cpsw_intr_disable(cpsw);
cpdma_ctlr_stop(cpsw->dma);
cpsw_ale_stop(cpsw->ale);
cpsw_destroy_xdp_rxqs(cpsw);
}
for_each_slave(priv, cpsw_slave_stop, cpsw);
......@@ -2004,6 +2338,64 @@ static int cpsw_ndo_setup_tc(struct net_device *ndev, enum tc_setup_type type,
}
}
static int cpsw_xdp_prog_setup(struct cpsw_priv *priv, struct netdev_bpf *bpf)
{
struct bpf_prog *prog = bpf->prog;
if (!priv->xdpi.prog && !prog)
return 0;
if (!xdp_attachment_flags_ok(&priv->xdpi, bpf))
return -EBUSY;
WRITE_ONCE(priv->xdp_prog, prog);
xdp_attachment_setup(&priv->xdpi, bpf);
return 0;
}
static int cpsw_ndo_bpf(struct net_device *ndev, struct netdev_bpf *bpf)
{
struct cpsw_priv *priv = netdev_priv(ndev);
switch (bpf->command) {
case XDP_SETUP_PROG:
return cpsw_xdp_prog_setup(priv, bpf);
case XDP_QUERY_PROG:
return xdp_attachment_query(&priv->xdpi, bpf);
default:
return -EINVAL;
}
}
static int cpsw_ndo_xdp_xmit(struct net_device *ndev, int n,
struct xdp_frame **frames, u32 flags)
{
struct cpsw_priv *priv = netdev_priv(ndev);
struct xdp_frame *xdpf;
int i, drops = 0;
if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
return -EINVAL;
for (i = 0; i < n; i++) {
xdpf = frames[i];
if (xdpf->len < CPSW_MIN_PACKET_SIZE) {
xdp_return_frame_rx_napi(xdpf);
drops++;
continue;
}
if (cpsw_xdp_tx_frame(priv, xdpf, NULL))
drops++;
}
return n - drops;
}
#ifdef CONFIG_NET_POLL_CONTROLLER
static void cpsw_ndo_poll_controller(struct net_device *ndev)
{
......@@ -2032,6 +2424,8 @@ static const struct net_device_ops cpsw_netdev_ops = {
.ndo_vlan_rx_add_vid = cpsw_ndo_vlan_rx_add_vid,
.ndo_vlan_rx_kill_vid = cpsw_ndo_vlan_rx_kill_vid,
.ndo_setup_tc = cpsw_ndo_setup_tc,
.ndo_bpf = cpsw_ndo_bpf,
.ndo_xdp_xmit = cpsw_ndo_xdp_xmit,
};
static void cpsw_get_drvinfo(struct net_device *ndev,
......
......@@ -578,6 +578,18 @@ static int cpsw_update_channels_res(struct cpsw_priv *priv, int ch_num, int rx,
return 0;
}
static void cpsw_fail(struct cpsw_common *cpsw)
{
struct net_device *ndev;
int i;
for (i = 0; i < cpsw->data.slaves; i++) {
ndev = cpsw->slaves[i].ndev;
if (ndev)
dev_close(ndev);
}
}
int cpsw_set_channels_common(struct net_device *ndev,
struct ethtool_channels *chs,
cpdma_handler_fn rx_handler)
......@@ -585,7 +597,7 @@ int cpsw_set_channels_common(struct net_device *ndev,
struct cpsw_priv *priv = netdev_priv(ndev);
struct cpsw_common *cpsw = priv->cpsw;
struct net_device *sl_ndev;
int i, ret;
int i, new_pools, ret;
ret = cpsw_check_ch_settings(cpsw, chs);
if (ret < 0)
......@@ -593,6 +605,8 @@ int cpsw_set_channels_common(struct net_device *ndev,
cpsw_suspend_data_pass(ndev);
new_pools = (chs->rx_count != cpsw->rx_ch_num) && cpsw->usage_count;
ret = cpsw_update_channels_res(priv, chs->rx_count, 1, rx_handler);
if (ret)
goto err;
......@@ -620,15 +634,21 @@ int cpsw_set_channels_common(struct net_device *ndev,
}
}
if (cpsw->usage_count)
cpsw_split_res(cpsw);
if (new_pools) {
cpsw_destroy_xdp_rxqs(cpsw);
ret = cpsw_create_xdp_rxqs(cpsw);
if (ret)
goto err;
}
ret = cpsw_resume_data_pass(ndev);
if (!ret)
return 0;
err:
dev_err(priv->dev, "cannot update channels number, closing device\n");
dev_close(ndev);
cpsw_fail(cpsw);
return ret;
}
......@@ -648,9 +668,8 @@ void cpsw_get_ringparam(struct net_device *ndev,
int cpsw_set_ringparam(struct net_device *ndev,
struct ethtool_ringparam *ering)
{
struct cpsw_priv *priv = netdev_priv(ndev);
struct cpsw_common *cpsw = priv->cpsw;
int ret;
struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
int descs_num, ret;
/* ignore ering->tx_pending - only rx_pending adjustment is supported */
......@@ -659,22 +678,34 @@ int cpsw_set_ringparam(struct net_device *ndev,
ering->rx_pending > (cpsw->descs_pool_size - CPSW_MAX_QUEUES))
return -EINVAL;
if (ering->rx_pending == cpdma_get_num_rx_descs(cpsw->dma))
descs_num = cpdma_get_num_rx_descs(cpsw->dma);
if (ering->rx_pending == descs_num)
return 0;
cpsw_suspend_data_pass(ndev);
cpdma_set_num_rx_descs(cpsw->dma, ering->rx_pending);
ret = cpdma_set_num_rx_descs(cpsw->dma, ering->rx_pending);
if (ret) {
if (cpsw_resume_data_pass(ndev))
goto err;
return ret;
}
if (cpsw->usage_count)
cpdma_chan_split_pool(cpsw->dma);
if (cpsw->usage_count) {
cpsw_destroy_xdp_rxqs(cpsw);
ret = cpsw_create_xdp_rxqs(cpsw);
if (ret)
goto err;
}
ret = cpsw_resume_data_pass(ndev);
if (!ret)
return 0;
err:
cpdma_set_num_rx_descs(cpsw->dma, descs_num);
dev_err(cpsw->dev, "cannot set ring params, closing device\n");
dev_close(ndev);
cpsw_fail(cpsw);
return ret;
}
......
......@@ -346,6 +346,7 @@ struct cpsw_common {
int rx_ch_num, tx_ch_num;
int speed;
int usage_count;
struct page_pool *page_pool[CPSW_MAX_QUEUES];
};
struct cpsw_priv {
......@@ -360,6 +361,10 @@ struct cpsw_priv {
int shp_cfg_speed;
int tx_ts_enabled;
int rx_ts_enabled;
struct bpf_prog *xdp_prog;
struct xdp_rxq_info xdp_rxq[CPSW_MAX_QUEUES];
struct xdp_attachment_info xdpi;
u32 emac_port;
struct cpsw_common *cpsw;
};
......@@ -391,6 +396,8 @@ int cpsw_fill_rx_channels(struct cpsw_priv *priv);
void cpsw_intr_enable(struct cpsw_common *cpsw);
void cpsw_intr_disable(struct cpsw_common *cpsw);
void cpsw_tx_handler(void *token, int len, int status);
int cpsw_create_xdp_rxqs(struct cpsw_common *cpsw);
void cpsw_destroy_xdp_rxqs(struct cpsw_common *cpsw);
/* ethtool */
u32 cpsw_get_msglevel(struct net_device *ndev);
......
......@@ -139,6 +139,7 @@ struct submit_info {
int directed;
void *token;
void *data;
int flags;
int len;
};
......@@ -184,6 +185,8 @@ static struct cpdma_control_info controls[] = {
(directed << CPDMA_TO_PORT_SHIFT)); \
} while (0)
#define CPDMA_DMA_EXT_MAP BIT(16)
static void cpdma_desc_pool_destroy(struct cpdma_ctlr *ctlr)
{
struct cpdma_desc_pool *pool = ctlr->pool;
......@@ -1015,6 +1018,7 @@ static int cpdma_chan_submit_si(struct submit_info *si)
struct cpdma_chan *chan = si->chan;
struct cpdma_ctlr *ctlr = chan->ctlr;
int len = si->len;
int swlen = len;
struct cpdma_desc __iomem *desc;
dma_addr_t buffer;
u32 mode;
......@@ -1036,15 +1040,21 @@ static int cpdma_chan_submit_si(struct submit_info *si)
chan->stats.runt_transmit_buff++;
}
mode = CPDMA_DESC_OWNER | CPDMA_DESC_SOP | CPDMA_DESC_EOP;
cpdma_desc_to_port(chan, mode, si->directed);
if (si->flags & CPDMA_DMA_EXT_MAP) {
buffer = (dma_addr_t)si->data;
dma_sync_single_for_device(ctlr->dev, buffer, len, chan->dir);
swlen |= CPDMA_DMA_EXT_MAP;
} else {
buffer = dma_map_single(ctlr->dev, si->data, len, chan->dir);
ret = dma_mapping_error(ctlr->dev, buffer);
if (ret) {
cpdma_desc_free(ctlr->pool, desc, 1);
return -EINVAL;
}
mode = CPDMA_DESC_OWNER | CPDMA_DESC_SOP | CPDMA_DESC_EOP;
cpdma_desc_to_port(chan, mode, si->directed);
}
/* Relaxed IO accessors can be used here as there is read barrier
* at the end of write sequence.
......@@ -1055,7 +1065,7 @@ static int cpdma_chan_submit_si(struct submit_info *si)
writel_relaxed(mode | len, &desc->hw_mode);
writel_relaxed((uintptr_t)si->token, &desc->sw_token);
writel_relaxed(buffer, &desc->sw_buffer);
writel_relaxed(len, &desc->sw_len);
writel_relaxed(swlen, &desc->sw_len);
desc_read(desc, sw_len);
__cpdma_chan_submit(chan, desc);
......@@ -1079,6 +1089,32 @@ int cpdma_chan_idle_submit(struct cpdma_chan *chan, void *token, void *data,
si.data = data;
si.len = len;
si.directed = directed;
si.flags = 0;
spin_lock_irqsave(&chan->lock, flags);
if (chan->state == CPDMA_STATE_TEARDOWN) {
spin_unlock_irqrestore(&chan->lock, flags);
return -EINVAL;
}
ret = cpdma_chan_submit_si(&si);
spin_unlock_irqrestore(&chan->lock, flags);
return ret;
}
int cpdma_chan_idle_submit_mapped(struct cpdma_chan *chan, void *token,
dma_addr_t data, int len, int directed)
{
struct submit_info si;
unsigned long flags;
int ret;
si.chan = chan;
si.token = token;
si.data = (void *)data;
si.len = len;
si.directed = directed;
si.flags = CPDMA_DMA_EXT_MAP;
spin_lock_irqsave(&chan->lock, flags);
if (chan->state == CPDMA_STATE_TEARDOWN) {
......@@ -1103,6 +1139,32 @@ int cpdma_chan_submit(struct cpdma_chan *chan, void *token, void *data,
si.data = data;
si.len = len;
si.directed = directed;
si.flags = 0;
spin_lock_irqsave(&chan->lock, flags);
if (chan->state != CPDMA_STATE_ACTIVE) {
spin_unlock_irqrestore(&chan->lock, flags);
return -EINVAL;
}
ret = cpdma_chan_submit_si(&si);
spin_unlock_irqrestore(&chan->lock, flags);
return ret;
}
int cpdma_chan_submit_mapped(struct cpdma_chan *chan, void *token,
dma_addr_t data, int len, int directed)
{
struct submit_info si;
unsigned long flags;
int ret;
si.chan = chan;
si.token = token;
si.data = (void *)data;
si.len = len;
si.directed = directed;
si.flags = CPDMA_DMA_EXT_MAP;
spin_lock_irqsave(&chan->lock, flags);
if (chan->state != CPDMA_STATE_ACTIVE) {
......@@ -1140,10 +1202,17 @@ static void __cpdma_chan_free(struct cpdma_chan *chan,
uintptr_t token;
token = desc_read(desc, sw_token);
buff_dma = desc_read(desc, sw_buffer);
origlen = desc_read(desc, sw_len);
buff_dma = desc_read(desc, sw_buffer);
if (origlen & CPDMA_DMA_EXT_MAP) {
origlen &= ~CPDMA_DMA_EXT_MAP;
dma_sync_single_for_cpu(ctlr->dev, buff_dma, origlen,
chan->dir);
} else {
dma_unmap_single(ctlr->dev, buff_dma, origlen, chan->dir);
}
cpdma_desc_free(pool, desc, 1);
(*chan->handler)((void *)token, outlen, status);
}
......@@ -1354,8 +1423,23 @@ int cpdma_get_num_tx_descs(struct cpdma_ctlr *ctlr)
return ctlr->num_tx_desc;
}
void cpdma_set_num_rx_descs(struct cpdma_ctlr *ctlr, int num_rx_desc)
int cpdma_set_num_rx_descs(struct cpdma_ctlr *ctlr, int num_rx_desc)
{
unsigned long flags;
int temp, ret;
spin_lock_irqsave(&ctlr->lock, flags);
temp = ctlr->num_rx_desc;
ctlr->num_rx_desc = num_rx_desc;
ctlr->num_tx_desc = ctlr->pool->num_desc - ctlr->num_rx_desc;
ret = cpdma_chan_split_pool(ctlr);
if (ret) {
ctlr->num_rx_desc = temp;
ctlr->num_tx_desc = ctlr->pool->num_desc - ctlr->num_rx_desc;
}
spin_unlock_irqrestore(&ctlr->lock, flags);
return ret;
}
......@@ -77,8 +77,12 @@ int cpdma_chan_stop(struct cpdma_chan *chan);
int cpdma_chan_get_stats(struct cpdma_chan *chan,
struct cpdma_chan_stats *stats);
int cpdma_chan_submit_mapped(struct cpdma_chan *chan, void *token,
dma_addr_t data, int len, int directed);
int cpdma_chan_submit(struct cpdma_chan *chan, void *token, void *data,
int len, int directed);
int cpdma_chan_idle_submit_mapped(struct cpdma_chan *chan, void *token,
dma_addr_t data, int len, int directed);
int cpdma_chan_idle_submit(struct cpdma_chan *chan, void *token, void *data,
int len, int directed);
int cpdma_chan_process(struct cpdma_chan *chan, int quota);
......@@ -112,8 +116,7 @@ enum cpdma_control {
int cpdma_control_get(struct cpdma_ctlr *ctlr, int control);
int cpdma_control_set(struct cpdma_ctlr *ctlr, int control, int value);
int cpdma_get_num_rx_descs(struct cpdma_ctlr *ctlr);
void cpdma_set_num_rx_descs(struct cpdma_ctlr *ctlr, int num_rx_desc);
int cpdma_set_num_rx_descs(struct cpdma_ctlr *ctlr, int num_rx_desc);
int cpdma_get_num_tx_descs(struct cpdma_ctlr *ctlr);
int cpdma_chan_split_pool(struct cpdma_ctlr *ctlr);
#endif
......@@ -101,6 +101,12 @@ struct page_pool {
struct ptr_ring ring;
atomic_t pages_state_release_cnt;
/* A page_pool is strictly tied to a single RX-queue being
* protected by NAPI, due to above pp_alloc_cache. This
* refcnt serves purpose is to simplify drivers error handling.
*/
refcount_t user_cnt;
};
struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp);
......@@ -134,6 +140,15 @@ static inline void page_pool_free(struct page_pool *pool)
#endif
}
/* Drivers use this instead of page_pool_free */
static inline void page_pool_destroy(struct page_pool *pool)
{
if (!pool)
return;
page_pool_free(pool);
}
/* Never call this directly, use helpers below */
void __page_pool_put_page(struct page_pool *pool,
struct page *page, bool allow_direct);
......@@ -201,4 +216,14 @@ static inline bool is_page_pool_compiled_in(void)
#endif
}
static inline void page_pool_get(struct page_pool *pool)
{
refcount_inc(&pool->user_cnt);
}
static inline bool page_pool_put(struct page_pool *pool)
{
return refcount_dec_and_test(&pool->user_cnt);
}
#endif /* _NET_PAGE_POOL_H */
......@@ -49,6 +49,9 @@ static int page_pool_init(struct page_pool *pool,
atomic_set(&pool->pages_state_release_cnt, 0);
/* Driver calling page_pool_create() also call page_pool_destroy() */
refcount_set(&pool->user_cnt, 1);
if (pool->p.flags & PP_FLAG_DMA_MAP)
get_device(pool->p.dev);
......@@ -70,6 +73,7 @@ struct page_pool *page_pool_create(const struct page_pool_params *params)
kfree(pool);
return ERR_PTR(err);
}
return pool;
}
EXPORT_SYMBOL(page_pool_create);
......@@ -356,6 +360,10 @@ static void __warn_in_flight(struct page_pool *pool)
void __page_pool_free(struct page_pool *pool)
{
/* Only last user actually free/release resources */
if (!page_pool_put(pool))
return;
WARN(pool->alloc.count, "API usage violation");
WARN(!ptr_ring_empty(&pool->ring), "ptr_ring is not empty");
......
......@@ -370,6 +370,9 @@ int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
goto err;
}
if (type == MEM_TYPE_PAGE_POOL)
page_pool_get(xdp_alloc->page_pool);
mutex_unlock(&mem_id_lock);
trace_mem_connect(xdp_alloc, xdp_rxq);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册