提交 1d233886 编写于 作者: T Toke Høiland-Jørgensen 提交者: Alexei Starovoitov

xdp: Use bulking for non-map XDP_REDIRECT and consolidate code paths

Since the bulk queue used by XDP_REDIRECT now lives in struct net_device,
we can re-use the bulking for the non-map version of the bpf_redirect()
helper. This is a simple matter of having xdp_do_redirect_slow() queue the
frame on the bulk queue instead of sending it out with __bpf_tx_xdp().

Unfortunately we can't make the bpf_redirect() helper return an error if
the ifindex doesn't exit (as bpf_redirect_map() does), because we don't
have a reference to the network namespace of the ingress device at the time
the helper is called. So we have to leave it as-is and keep the device
lookup in xdp_do_redirect_slow().

Since this leaves less reason to have the non-map redirect code in a
separate function, so we get rid of the xdp_do_redirect_slow() function
entirely. This does lose us the tracepoint disambiguation, but fortunately
the xdp_redirect and xdp_redirect_map tracepoints use the same tracepoint
entry structures. This means both can contain a map index, so we can just
amend the tracepoint definitions so we always emit the xdp_redirect(_err)
tracepoints, but with the map ID only populated if a map is present. This
means we retire the xdp_redirect_map(_err) tracepoints entirely, but keep
the definitions around in case someone is still listening for them.

With this change, the performance of the xdp_redirect sample program goes
from 5Mpps to 8.4Mpps (a 68% increase).

Since the flush functions are no longer map-specific, rename the flush()
functions to drop _map from their names. One of the renamed functions is
the xdp_do_flush_map() callback used in all the xdp-enabled drivers. To
keep from having to update all drivers, use a #define to keep the old name
working, and only update the virtual drivers in this patch.
Signed-off-by: NToke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: NAlexei Starovoitov <ast@kernel.org>
Acked-by: NJohn Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/157918768505.1458396.17518057312953572912.stgit@toke.dk
上级 75ccae62
......@@ -1718,7 +1718,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
if (err < 0)
goto err_xdp;
if (err == XDP_REDIRECT)
xdp_do_flush_map();
xdp_do_flush();
if (err != XDP_PASS)
goto out;
......@@ -2549,7 +2549,7 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
}
if (flush)
xdp_do_flush_map();
xdp_do_flush();
rcu_read_unlock();
local_bh_enable();
......
......@@ -769,7 +769,7 @@ static int veth_poll(struct napi_struct *napi, int budget)
if (xdp_xmit & VETH_XDP_TX)
veth_xdp_flush(rq->dev, &bq);
if (xdp_xmit & VETH_XDP_REDIR)
xdp_do_flush_map();
xdp_do_flush();
xdp_clear_return_frame_no_direct();
return done;
......
......@@ -1432,7 +1432,7 @@ static int virtnet_poll(struct napi_struct *napi, int budget)
virtqueue_napi_complete(napi, rq->vq, received);
if (xdp_xmit & VIRTIO_XDP_REDIR)
xdp_do_flush_map();
xdp_do_flush();
if (xdp_xmit & VIRTIO_XDP_TX) {
sq = virtnet_xdp_sq(vi);
......
......@@ -1056,7 +1056,9 @@ struct sk_buff;
struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key);
struct bpf_dtab_netdev *__dev_map_hash_lookup_elem(struct bpf_map *map, u32 key);
void __dev_map_flush(void);
void __dev_flush(void);
int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
struct net_device *dev_rx);
int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
struct net_device *dev_rx);
int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
......@@ -1169,13 +1171,20 @@ static inline struct net_device *__dev_map_hash_lookup_elem(struct bpf_map *map
return NULL;
}
static inline void __dev_map_flush(void)
static inline void __dev_flush(void)
{
}
struct xdp_buff;
struct bpf_dtab_netdev;
static inline
int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
struct net_device *dev_rx)
{
return 0;
}
static inline
int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
struct net_device *dev_rx)
......
......@@ -918,7 +918,7 @@ static inline int xdp_ok_fwd_dev(const struct net_device *fwd,
return 0;
}
/* The pair of xdp_do_redirect and xdp_do_flush_map MUST be called in the
/* The pair of xdp_do_redirect and xdp_do_flush MUST be called in the
* same cpu context. Further for best results no more than a single map
* for the do_redirect/do_flush pair should be used. This limitation is
* because we only track one map and force a flush when the map changes.
......@@ -929,7 +929,13 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
int xdp_do_redirect(struct net_device *dev,
struct xdp_buff *xdp,
struct bpf_prog *prog);
void xdp_do_flush_map(void);
void xdp_do_flush(void);
/* The xdp_do_flush_map() helper has been renamed to drop the _map suffix, as
* it is no longer only flushing maps. Keep this define for compatibility
* until all drivers are updated - do not use xdp_do_flush_map() in new code!
*/
#define xdp_do_flush_map xdp_do_flush
void bpf_warn_invalid_xdp_action(u32 act);
......
......@@ -79,14 +79,26 @@ TRACE_EVENT(xdp_bulk_tx,
__entry->sent, __entry->drops, __entry->err)
);
#ifndef __DEVMAP_OBJ_TYPE
#define __DEVMAP_OBJ_TYPE
struct _bpf_dtab_netdev {
struct net_device *dev;
};
#endif /* __DEVMAP_OBJ_TYPE */
#define devmap_ifindex(tgt, map) \
(((map->map_type == BPF_MAP_TYPE_DEVMAP || \
map->map_type == BPF_MAP_TYPE_DEVMAP_HASH)) ? \
((struct _bpf_dtab_netdev *)tgt)->dev->ifindex : 0)
DECLARE_EVENT_CLASS(xdp_redirect_template,
TP_PROTO(const struct net_device *dev,
const struct bpf_prog *xdp,
int to_ifindex, int err,
const struct bpf_map *map, u32 map_index),
const void *tgt, int err,
const struct bpf_map *map, u32 index),
TP_ARGS(dev, xdp, to_ifindex, err, map, map_index),
TP_ARGS(dev, xdp, tgt, err, map, index),
TP_STRUCT__entry(
__field(int, prog_id)
......@@ -103,90 +115,65 @@ DECLARE_EVENT_CLASS(xdp_redirect_template,
__entry->act = XDP_REDIRECT;
__entry->ifindex = dev->ifindex;
__entry->err = err;
__entry->to_ifindex = to_ifindex;
__entry->to_ifindex = map ? devmap_ifindex(tgt, map) :
index;
__entry->map_id = map ? map->id : 0;
__entry->map_index = map_index;
__entry->map_index = map ? index : 0;
),
TP_printk("prog_id=%d action=%s ifindex=%d to_ifindex=%d err=%d",
TP_printk("prog_id=%d action=%s ifindex=%d to_ifindex=%d err=%d"
" map_id=%d map_index=%d",
__entry->prog_id,
__print_symbolic(__entry->act, __XDP_ACT_SYM_TAB),
__entry->ifindex, __entry->to_ifindex,
__entry->err)
__entry->err, __entry->map_id, __entry->map_index)
);
DEFINE_EVENT(xdp_redirect_template, xdp_redirect,
TP_PROTO(const struct net_device *dev,
const struct bpf_prog *xdp,
int to_ifindex, int err,
const struct bpf_map *map, u32 map_index),
TP_ARGS(dev, xdp, to_ifindex, err, map, map_index)
const void *tgt, int err,
const struct bpf_map *map, u32 index),
TP_ARGS(dev, xdp, tgt, err, map, index)
);
DEFINE_EVENT(xdp_redirect_template, xdp_redirect_err,
TP_PROTO(const struct net_device *dev,
const struct bpf_prog *xdp,
int to_ifindex, int err,
const struct bpf_map *map, u32 map_index),
TP_ARGS(dev, xdp, to_ifindex, err, map, map_index)
const void *tgt, int err,
const struct bpf_map *map, u32 index),
TP_ARGS(dev, xdp, tgt, err, map, index)
);
#define _trace_xdp_redirect(dev, xdp, to) \
trace_xdp_redirect(dev, xdp, to, 0, NULL, 0);
trace_xdp_redirect(dev, xdp, NULL, 0, NULL, to);
#define _trace_xdp_redirect_err(dev, xdp, to, err) \
trace_xdp_redirect_err(dev, xdp, to, err, NULL, 0);
trace_xdp_redirect_err(dev, xdp, NULL, err, NULL, to);
#define _trace_xdp_redirect_map(dev, xdp, to, map, index) \
trace_xdp_redirect(dev, xdp, to, 0, map, index);
#define _trace_xdp_redirect_map_err(dev, xdp, to, map, index, err) \
trace_xdp_redirect_err(dev, xdp, to, err, map, index);
DEFINE_EVENT_PRINT(xdp_redirect_template, xdp_redirect_map,
/* not used anymore, but kept around so as not to break old programs */
DEFINE_EVENT(xdp_redirect_template, xdp_redirect_map,
TP_PROTO(const struct net_device *dev,
const struct bpf_prog *xdp,
int to_ifindex, int err,
const struct bpf_map *map, u32 map_index),
TP_ARGS(dev, xdp, to_ifindex, err, map, map_index),
TP_printk("prog_id=%d action=%s ifindex=%d to_ifindex=%d err=%d"
" map_id=%d map_index=%d",
__entry->prog_id,
__print_symbolic(__entry->act, __XDP_ACT_SYM_TAB),
__entry->ifindex, __entry->to_ifindex,
__entry->err,
__entry->map_id, __entry->map_index)
const void *tgt, int err,
const struct bpf_map *map, u32 index),
TP_ARGS(dev, xdp, tgt, err, map, index)
);
DEFINE_EVENT_PRINT(xdp_redirect_template, xdp_redirect_map_err,
DEFINE_EVENT(xdp_redirect_template, xdp_redirect_map_err,
TP_PROTO(const struct net_device *dev,
const struct bpf_prog *xdp,
int to_ifindex, int err,
const struct bpf_map *map, u32 map_index),
TP_ARGS(dev, xdp, to_ifindex, err, map, map_index),
TP_printk("prog_id=%d action=%s ifindex=%d to_ifindex=%d err=%d"
" map_id=%d map_index=%d",
__entry->prog_id,
__print_symbolic(__entry->act, __XDP_ACT_SYM_TAB),
__entry->ifindex, __entry->to_ifindex,
__entry->err,
__entry->map_id, __entry->map_index)
const void *tgt, int err,
const struct bpf_map *map, u32 index),
TP_ARGS(dev, xdp, tgt, err, map, index)
);
#ifndef __DEVMAP_OBJ_TYPE
#define __DEVMAP_OBJ_TYPE
struct _bpf_dtab_netdev {
struct net_device *dev;
};
#endif /* __DEVMAP_OBJ_TYPE */
#define devmap_ifindex(fwd, map) \
((map->map_type == BPF_MAP_TYPE_DEVMAP || \
map->map_type == BPF_MAP_TYPE_DEVMAP_HASH) ? \
((struct _bpf_dtab_netdev *)fwd)->dev->ifindex : 0)
#define _trace_xdp_redirect_map(dev, xdp, fwd, map, idx) \
trace_xdp_redirect_map(dev, xdp, devmap_ifindex(fwd, map), \
0, map, idx)
#define _trace_xdp_redirect_map_err(dev, xdp, fwd, map, idx, err) \
trace_xdp_redirect_map_err(dev, xdp, devmap_ifindex(fwd, map), \
err, map, idx)
TRACE_EVENT(xdp_cpumap_kthread,
TP_PROTO(int map_id, unsigned int processed, unsigned int drops,
......
......@@ -81,7 +81,7 @@ struct bpf_dtab {
u32 n_buckets;
};
static DEFINE_PER_CPU(struct list_head, dev_map_flush_list);
static DEFINE_PER_CPU(struct list_head, dev_flush_list);
static DEFINE_SPINLOCK(dev_map_lock);
static LIST_HEAD(dev_map_list);
......@@ -357,16 +357,16 @@ static int bq_xmit_all(struct xdp_dev_bulk_queue *bq, u32 flags)
goto out;
}
/* __dev_map_flush is called from xdp_do_flush_map() which _must_ be signaled
/* __dev_flush is called from xdp_do_flush() which _must_ be signaled
* from the driver before returning from its napi->poll() routine. The poll()
* routine is called either from busy_poll context or net_rx_action signaled
* from NET_RX_SOFTIRQ. Either way the poll routine must complete before the
* net device can be torn down. On devmap tear down we ensure the flush list
* is empty before completing to ensure all flush operations have completed.
*/
void __dev_map_flush(void)
void __dev_flush(void)
{
struct list_head *flush_list = this_cpu_ptr(&dev_map_flush_list);
struct list_head *flush_list = this_cpu_ptr(&dev_flush_list);
struct xdp_dev_bulk_queue *bq, *tmp;
rcu_read_lock();
......@@ -396,9 +396,8 @@ struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
*/
static int bq_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
struct net_device *dev_rx)
{
struct list_head *flush_list = this_cpu_ptr(&dev_map_flush_list);
struct list_head *flush_list = this_cpu_ptr(&dev_flush_list);
struct xdp_dev_bulk_queue *bq = this_cpu_ptr(dev->xdp_bulkq);
if (unlikely(bq->count == DEV_MAP_BULK_SIZE))
......@@ -419,10 +418,9 @@ static int bq_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
return 0;
}
int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
struct net_device *dev_rx)
static inline int __xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
struct net_device *dev_rx)
{
struct net_device *dev = dst->dev;
struct xdp_frame *xdpf;
int err;
......@@ -440,6 +438,20 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
return bq_enqueue(dev, xdpf, dev_rx);
}
int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
struct net_device *dev_rx)
{
return __xdp_enqueue(dev, xdp, dev_rx);
}
int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
struct net_device *dev_rx)
{
struct net_device *dev = dst->dev;
return __xdp_enqueue(dev, xdp, dev_rx);
}
int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
struct bpf_prog *xdp_prog)
{
......@@ -762,7 +774,7 @@ static int __init dev_map_init(void)
register_netdevice_notifier(&dev_map_notifier);
for_each_possible_cpu(cpu)
INIT_LIST_HEAD(&per_cpu(dev_map_flush_list, cpu));
INIT_LIST_HEAD(&per_cpu(dev_flush_list, cpu));
return 0;
}
......
......@@ -3458,58 +3458,6 @@ static const struct bpf_func_proto bpf_xdp_adjust_meta_proto = {
.arg2_type = ARG_ANYTHING,
};
static int __bpf_tx_xdp(struct net_device *dev,
struct bpf_map *map,
struct xdp_buff *xdp,
u32 index)
{
struct xdp_frame *xdpf;
int err, sent;
if (!dev->netdev_ops->ndo_xdp_xmit) {
return -EOPNOTSUPP;
}
err = xdp_ok_fwd_dev(dev, xdp->data_end - xdp->data);
if (unlikely(err))
return err;
xdpf = convert_to_xdp_frame(xdp);
if (unlikely(!xdpf))
return -EOVERFLOW;
sent = dev->netdev_ops->ndo_xdp_xmit(dev, 1, &xdpf, XDP_XMIT_FLUSH);
if (sent <= 0)
return sent;
return 0;
}
static noinline int
xdp_do_redirect_slow(struct net_device *dev, struct xdp_buff *xdp,
struct bpf_prog *xdp_prog, struct bpf_redirect_info *ri)
{
struct net_device *fwd;
u32 index = ri->tgt_index;
int err;
fwd = dev_get_by_index_rcu(dev_net(dev), index);
ri->tgt_index = 0;
if (unlikely(!fwd)) {
err = -EINVAL;
goto err;
}
err = __bpf_tx_xdp(fwd, NULL, xdp, 0);
if (unlikely(err))
goto err;
_trace_xdp_redirect(dev, xdp_prog, index);
return 0;
err:
_trace_xdp_redirect_err(dev, xdp_prog, index, err);
return err;
}
static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd,
struct bpf_map *map, struct xdp_buff *xdp)
{
......@@ -3527,13 +3475,13 @@ static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd,
return 0;
}
void xdp_do_flush_map(void)
void xdp_do_flush(void)
{
__dev_map_flush();
__dev_flush();
__cpu_map_flush();
__xsk_map_flush();
}
EXPORT_SYMBOL_GPL(xdp_do_flush_map);
EXPORT_SYMBOL_GPL(xdp_do_flush);
static inline void *__xdp_map_lookup_elem(struct bpf_map *map, u32 index)
{
......@@ -3568,10 +3516,11 @@ void bpf_clear_redirect_map(struct bpf_map *map)
}
}
static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
struct bpf_prog *xdp_prog, struct bpf_map *map,
struct bpf_redirect_info *ri)
int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
struct bpf_prog *xdp_prog)
{
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
struct bpf_map *map = READ_ONCE(ri->map);
u32 index = ri->tgt_index;
void *fwd = ri->tgt_value;
int err;
......@@ -3580,7 +3529,18 @@ static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
ri->tgt_value = NULL;
WRITE_ONCE(ri->map, NULL);
err = __bpf_tx_xdp_map(dev, fwd, map, xdp);
if (unlikely(!map)) {
fwd = dev_get_by_index_rcu(dev_net(dev), index);
if (unlikely(!fwd)) {
err = -EINVAL;
goto err;
}
err = dev_xdp_enqueue(fwd, xdp, dev);
} else {
err = __bpf_tx_xdp_map(dev, fwd, map, xdp);
}
if (unlikely(err))
goto err;
......@@ -3590,18 +3550,6 @@ static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
_trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err);
return err;
}
int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
struct bpf_prog *xdp_prog)
{
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
struct bpf_map *map = READ_ONCE(ri->map);
if (likely(map))
return xdp_do_redirect_map(dev, xdp, xdp_prog, map, ri);
return xdp_do_redirect_slow(dev, xdp, xdp_prog, ri);
}
EXPORT_SYMBOL_GPL(xdp_do_redirect);
static int xdp_do_generic_redirect_map(struct net_device *dev,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册