提交 f5836ca5 编写于 作者: J Jesper Dangaard Brouer 提交者: David S. Miller

xdp: separate xdp_redirect tracepoint in error case

There is a need to separate the xdp_redirect tracepoint into two
tracepoints, for separating the error case from the normal forward
case.

Due to the extreme speeds XDP is operating at, loading a tracepoint
have a measurable impact.  Single core XDP REDIRECT (ethtool tuned
rx-usecs 25) can do 13.7 Mpps forwarding, but loading a simple
bpf_prog at the tracepoint (with a return 0) reduce perf to 10.2 Mpps
(CPU E5-1650 v4 @ 3.60GHz, driver: ixgbe)

The overhead of loading a bpf-based tracepoint can be calculated to
cost 25 nanosec ((1/13782002-1/10267937)*10^9 = -24.83 ns).

Using perf record on the tracepoint event, with a non-matching --filter
expression, the overhead is much larger. Performance drops to 8.3 Mpps,
cost 48 nanosec ((1/13782002-1/8312497)*10^9 = -47.74))

Having a separate tracepoint for err cases, which should be less
frequent, allow running a continuous monitor for errors while not
affecting the redirect forward performance (this have also been
verified by measurements).
Signed-off-by: NJesper Dangaard Brouer <brouer@redhat.com>
Acked-by: NAlexei Starovoitov <ast@kernel.org>
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
上级 b06337df
......@@ -94,11 +94,25 @@ DEFINE_EVENT(xdp_redirect_template, xdp_redirect,
TP_ARGS(dev, xdp, to_ifindex, err, map, map_index)
);
#define _trace_xdp_redirect(dev, xdp, to, err) \
trace_xdp_redirect(dev, xdp, to, err, NULL, 0);
DEFINE_EVENT(xdp_redirect_template, xdp_redirect_err,
TP_PROTO(const struct net_device *dev,
const struct bpf_prog *xdp,
int to_ifindex, int err,
const struct bpf_map *map, u32 map_index),
TP_ARGS(dev, xdp, to_ifindex, err, map, map_index)
);
#define _trace_xdp_redirect(dev, xdp, to) \
trace_xdp_redirect(dev, xdp, to, 0, NULL, 0);
#define _trace_xdp_redirect_err(dev, xdp, to, err) \
trace_xdp_redirect_err(dev, xdp, to, err, NULL, 0);
#define trace_xdp_redirect_map(dev, xdp, fwd, map, idx) \
trace_xdp_redirect(dev, xdp, fwd ? fwd->ifindex : 0, 0, map, idx);
#define trace_xdp_redirect_map(dev, xdp, fwd, err, map, idx) \
trace_xdp_redirect(dev, xdp, fwd ? fwd->ifindex : 0, err, map, idx);
#define trace_xdp_redirect_map_err(dev, xdp, fwd, map, idx, err) \
trace_xdp_redirect_err(dev, xdp, fwd ? fwd->ifindex : 0, err, map, idx);
#endif /* _TRACE_XDP_H */
......
......@@ -2515,16 +2515,20 @@ static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
fwd = __dev_map_lookup_elem(map, index);
if (!fwd) {
err = -EINVAL;
goto out;
goto err;
}
if (ri->map_to_flush && ri->map_to_flush != map)
xdp_do_flush_map();
err = __bpf_tx_xdp(fwd, map, xdp, index);
if (likely(!err))
ri->map_to_flush = map;
out:
trace_xdp_redirect_map(dev, xdp_prog, fwd, err, map, index);
if (unlikely(err))
goto err;
ri->map_to_flush = map;
trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index);
return 0;
err:
trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err);
return err;
}
......@@ -2543,12 +2547,17 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
ri->ifindex = 0;
if (unlikely(!fwd)) {
err = -EINVAL;
goto out;
goto err;
}
err = __bpf_tx_xdp(fwd, NULL, xdp, 0);
out:
_trace_xdp_redirect(dev, xdp_prog, index, err);
if (unlikely(err))
goto err;
_trace_xdp_redirect(dev, xdp_prog, index);
return 0;
err:
_trace_xdp_redirect_err(dev, xdp_prog, index, err);
return err;
}
EXPORT_SYMBOL_GPL(xdp_do_redirect);
......@@ -2566,23 +2575,25 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
ri->ifindex = 0;
if (unlikely(!fwd)) {
err = -EINVAL;
goto out;
goto err;
}
if (unlikely(!(fwd->flags & IFF_UP))) {
err = -ENETDOWN;
goto out;
goto err;
}
len = fwd->mtu + fwd->hard_header_len + VLAN_HLEN;
if (skb->len > len) {
err = -EMSGSIZE;
goto out;
goto err;
}
skb->dev = fwd;
out:
_trace_xdp_redirect(dev, xdp_prog, index, err);
_trace_xdp_redirect(dev, xdp_prog, index);
return 0;
err:
_trace_xdp_redirect_err(dev, xdp_prog, index, err);
return err;
}
EXPORT_SYMBOL_GPL(xdp_do_generic_redirect);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册