提交 90fed9c9 编写于 作者: D David S. Miller

Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next

Alexei Starovoitov says:

====================
pull-request: bpf-next 2018-05-24

The following pull-request contains BPF updates for your *net-next* tree.

The main changes are:

1) Björn Töpel cleans up AF_XDP (removes rebind, explicit cache alignment from uapi, etc).

2) David Ahern adds mtu checks to bpf_ipv{4,6}_fib_lookup() helpers.

3) Jesper Dangaard Brouer adds bulking support to ndo_xdp_xmit.

4) Jiong Wang adds support for indirect and arithmetic shifts to NFP

5) Martin KaFai Lau cleans up BTF uapi and makes the btf_header extensible.

6) Mathieu Xhonneux adds an End.BPF action to seg6local with BPF helpers allowing
   to edit/grow/shrink a SRH and apply on a packet generic SRv6 actions.

7) Sandipan Das adds support for bpf2bpf function calls in ppc64 JIT.

8) Yonghong Song adds BPF_TASK_FD_QUERY command for introspection of tracing events.

9) other misc fixes from Gustavo A. R. Silva, Sirio Balmelli, John Fastabend, and Magnus Karlsson
====================
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
...@@ -167,25 +167,37 @@ static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) ...@@ -167,25 +167,37 @@ static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
static void bpf_jit_emit_func_call(u32 *image, struct codegen_context *ctx, u64 func) static void bpf_jit_emit_func_call(u32 *image, struct codegen_context *ctx, u64 func)
{ {
unsigned int i, ctx_idx = ctx->idx;
/* Load function address into r12 */
PPC_LI64(12, func);
/* For bpf-to-bpf function calls, the callee's address is unknown
* until the last extra pass. As seen above, we use PPC_LI64() to
* load the callee's address, but this may optimize the number of
* instructions required based on the nature of the address.
*
* Since we don't want the number of instructions emitted to change,
* we pad the optimized PPC_LI64() call with NOPs to guarantee that
* we always have a five-instruction sequence, which is the maximum
* that PPC_LI64() can emit.
*/
for (i = ctx->idx - ctx_idx; i < 5; i++)
PPC_NOP();
#ifdef PPC64_ELF_ABI_v1 #ifdef PPC64_ELF_ABI_v1
/* func points to the function descriptor */
PPC_LI64(b2p[TMP_REG_2], func);
/* Load actual entry point from function descriptor */
PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_2], 0);
/* ... and move it to LR */
PPC_MTLR(b2p[TMP_REG_1]);
/* /*
* Load TOC from function descriptor at offset 8. * Load TOC from function descriptor at offset 8.
* We can clobber r2 since we get called through a * We can clobber r2 since we get called through a
* function pointer (so caller will save/restore r2) * function pointer (so caller will save/restore r2)
* and since we don't use a TOC ourself. * and since we don't use a TOC ourself.
*/ */
PPC_BPF_LL(2, b2p[TMP_REG_2], 8); PPC_BPF_LL(2, 12, 8);
#else /* Load actual entry point from function descriptor */
/* We can clobber r12 */ PPC_BPF_LL(12, 12, 0);
PPC_FUNC_ADDR(12, func);
PPC_MTLR(12);
#endif #endif
PPC_MTLR(12);
PPC_BLRL(); PPC_BLRL();
} }
...@@ -256,7 +268,7 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 ...@@ -256,7 +268,7 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32
/* Assemble the body code between the prologue & epilogue */ /* Assemble the body code between the prologue & epilogue */
static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
struct codegen_context *ctx, struct codegen_context *ctx,
u32 *addrs) u32 *addrs, bool extra_pass)
{ {
const struct bpf_insn *insn = fp->insnsi; const struct bpf_insn *insn = fp->insnsi;
int flen = fp->len; int flen = fp->len;
...@@ -712,11 +724,25 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, ...@@ -712,11 +724,25 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
break; break;
/* /*
* Call kernel helper * Call kernel helper or bpf function
*/ */
case BPF_JMP | BPF_CALL: case BPF_JMP | BPF_CALL:
ctx->seen |= SEEN_FUNC; ctx->seen |= SEEN_FUNC;
func = (u8 *) __bpf_call_base + imm;
/* bpf function call */
if (insn[i].src_reg == BPF_PSEUDO_CALL)
if (!extra_pass)
func = NULL;
else if (fp->aux->func && off < fp->aux->func_cnt)
/* use the subprog id from the off
* field to lookup the callee address
*/
func = (u8 *) fp->aux->func[off]->bpf_func;
else
return -EINVAL;
/* kernel helper call */
else
func = (u8 *) __bpf_call_base + imm;
bpf_jit_emit_func_call(image, ctx, (u64)func); bpf_jit_emit_func_call(image, ctx, (u64)func);
...@@ -864,6 +890,14 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, ...@@ -864,6 +890,14 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
return 0; return 0;
} }
struct powerpc64_jit_data {
struct bpf_binary_header *header;
u32 *addrs;
u8 *image;
u32 proglen;
struct codegen_context ctx;
};
struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
{ {
u32 proglen; u32 proglen;
...@@ -871,6 +905,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) ...@@ -871,6 +905,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
u8 *image = NULL; u8 *image = NULL;
u32 *code_base; u32 *code_base;
u32 *addrs; u32 *addrs;
struct powerpc64_jit_data *jit_data;
struct codegen_context cgctx; struct codegen_context cgctx;
int pass; int pass;
int flen; int flen;
...@@ -878,6 +913,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) ...@@ -878,6 +913,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
struct bpf_prog *org_fp = fp; struct bpf_prog *org_fp = fp;
struct bpf_prog *tmp_fp; struct bpf_prog *tmp_fp;
bool bpf_blinded = false; bool bpf_blinded = false;
bool extra_pass = false;
if (!fp->jit_requested) if (!fp->jit_requested)
return org_fp; return org_fp;
...@@ -891,11 +927,32 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) ...@@ -891,11 +927,32 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
fp = tmp_fp; fp = tmp_fp;
} }
jit_data = fp->aux->jit_data;
if (!jit_data) {
jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL);
if (!jit_data) {
fp = org_fp;
goto out;
}
fp->aux->jit_data = jit_data;
}
flen = fp->len; flen = fp->len;
addrs = jit_data->addrs;
if (addrs) {
cgctx = jit_data->ctx;
image = jit_data->image;
bpf_hdr = jit_data->header;
proglen = jit_data->proglen;
alloclen = proglen + FUNCTION_DESCR_SIZE;
extra_pass = true;
goto skip_init_ctx;
}
addrs = kzalloc((flen+1) * sizeof(*addrs), GFP_KERNEL); addrs = kzalloc((flen+1) * sizeof(*addrs), GFP_KERNEL);
if (addrs == NULL) { if (addrs == NULL) {
fp = org_fp; fp = org_fp;
goto out; goto out_addrs;
} }
memset(&cgctx, 0, sizeof(struct codegen_context)); memset(&cgctx, 0, sizeof(struct codegen_context));
...@@ -904,10 +961,10 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) ...@@ -904,10 +961,10 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
cgctx.stack_size = round_up(fp->aux->stack_depth, 16); cgctx.stack_size = round_up(fp->aux->stack_depth, 16);
/* Scouting faux-generate pass 0 */ /* Scouting faux-generate pass 0 */
if (bpf_jit_build_body(fp, 0, &cgctx, addrs)) { if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) {
/* We hit something illegal or unsupported. */ /* We hit something illegal or unsupported. */
fp = org_fp; fp = org_fp;
goto out; goto out_addrs;
} }
/* /*
...@@ -925,9 +982,10 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) ...@@ -925,9 +982,10 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
bpf_jit_fill_ill_insns); bpf_jit_fill_ill_insns);
if (!bpf_hdr) { if (!bpf_hdr) {
fp = org_fp; fp = org_fp;
goto out; goto out_addrs;
} }
skip_init_ctx:
code_base = (u32 *)(image + FUNCTION_DESCR_SIZE); code_base = (u32 *)(image + FUNCTION_DESCR_SIZE);
/* Code generation passes 1-2 */ /* Code generation passes 1-2 */
...@@ -935,7 +993,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) ...@@ -935,7 +993,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
/* Now build the prologue, body code & epilogue for real. */ /* Now build the prologue, body code & epilogue for real. */
cgctx.idx = 0; cgctx.idx = 0;
bpf_jit_build_prologue(code_base, &cgctx); bpf_jit_build_prologue(code_base, &cgctx);
bpf_jit_build_body(fp, code_base, &cgctx, addrs); bpf_jit_build_body(fp, code_base, &cgctx, addrs, extra_pass);
bpf_jit_build_epilogue(code_base, &cgctx); bpf_jit_build_epilogue(code_base, &cgctx);
if (bpf_jit_enable > 1) if (bpf_jit_enable > 1)
...@@ -961,10 +1019,20 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) ...@@ -961,10 +1019,20 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
fp->jited_len = alloclen; fp->jited_len = alloclen;
bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + (bpf_hdr->pages * PAGE_SIZE)); bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + (bpf_hdr->pages * PAGE_SIZE));
if (!fp->is_func || extra_pass) {
out_addrs:
kfree(addrs);
kfree(jit_data);
fp->aux->jit_data = NULL;
} else {
jit_data->addrs = addrs;
jit_data->ctx = cgctx;
jit_data->proglen = proglen;
jit_data->image = image;
jit_data->header = bpf_hdr;
}
out: out:
kfree(addrs);
if (bpf_blinded) if (bpf_blinded)
bpf_jit_prog_release_other(fp, fp == org_fp ? tmp_fp : org_fp); bpf_jit_prog_release_other(fp, fp == org_fp ? tmp_fp : org_fp);
......
...@@ -3664,14 +3664,19 @@ netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev) ...@@ -3664,14 +3664,19 @@ netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
* @dev: netdev * @dev: netdev
* @xdp: XDP buffer * @xdp: XDP buffer
* *
* Returns Zero if sent, else an error code * Returns number of frames successfully sent. Frames that fail are
* free'ed via XDP return API.
*
* For error cases, a negative errno code is returned and no-frames
* are transmitted (caller must handle freeing frames).
**/ **/
int i40e_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf) int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames)
{ {
struct i40e_netdev_priv *np = netdev_priv(dev); struct i40e_netdev_priv *np = netdev_priv(dev);
unsigned int queue_index = smp_processor_id(); unsigned int queue_index = smp_processor_id();
struct i40e_vsi *vsi = np->vsi; struct i40e_vsi *vsi = np->vsi;
int err; int drops = 0;
int i;
if (test_bit(__I40E_VSI_DOWN, vsi->state)) if (test_bit(__I40E_VSI_DOWN, vsi->state))
return -ENETDOWN; return -ENETDOWN;
...@@ -3679,11 +3684,18 @@ int i40e_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf) ...@@ -3679,11 +3684,18 @@ int i40e_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf)
if (!i40e_enabled_xdp_vsi(vsi) || queue_index >= vsi->num_queue_pairs) if (!i40e_enabled_xdp_vsi(vsi) || queue_index >= vsi->num_queue_pairs)
return -ENXIO; return -ENXIO;
err = i40e_xmit_xdp_ring(xdpf, vsi->xdp_rings[queue_index]); for (i = 0; i < n; i++) {
if (err != I40E_XDP_TX) struct xdp_frame *xdpf = frames[i];
return -ENOSPC; int err;
return 0; err = i40e_xmit_xdp_ring(xdpf, vsi->xdp_rings[queue_index]);
if (err != I40E_XDP_TX) {
xdp_return_frame_rx_napi(xdpf);
drops++;
}
}
return n - drops;
} }
/** /**
......
...@@ -487,7 +487,7 @@ u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw); ...@@ -487,7 +487,7 @@ u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw);
void i40e_detect_recover_hung(struct i40e_vsi *vsi); void i40e_detect_recover_hung(struct i40e_vsi *vsi);
int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size); int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size);
bool __i40e_chk_linearize(struct sk_buff *skb); bool __i40e_chk_linearize(struct sk_buff *skb);
int i40e_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf); int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames);
void i40e_xdp_flush(struct net_device *dev); void i40e_xdp_flush(struct net_device *dev);
/** /**
......
...@@ -10022,11 +10022,13 @@ static int ixgbe_xdp(struct net_device *dev, struct netdev_bpf *xdp) ...@@ -10022,11 +10022,13 @@ static int ixgbe_xdp(struct net_device *dev, struct netdev_bpf *xdp)
} }
} }
static int ixgbe_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf) static int ixgbe_xdp_xmit(struct net_device *dev, int n,
struct xdp_frame **frames)
{ {
struct ixgbe_adapter *adapter = netdev_priv(dev); struct ixgbe_adapter *adapter = netdev_priv(dev);
struct ixgbe_ring *ring; struct ixgbe_ring *ring;
int err; int drops = 0;
int i;
if (unlikely(test_bit(__IXGBE_DOWN, &adapter->state))) if (unlikely(test_bit(__IXGBE_DOWN, &adapter->state)))
return -ENETDOWN; return -ENETDOWN;
...@@ -10038,11 +10040,18 @@ static int ixgbe_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf) ...@@ -10038,11 +10040,18 @@ static int ixgbe_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf)
if (unlikely(!ring)) if (unlikely(!ring))
return -ENXIO; return -ENXIO;
err = ixgbe_xmit_xdp_ring(adapter, xdpf); for (i = 0; i < n; i++) {
if (err != IXGBE_XDP_TX) struct xdp_frame *xdpf = frames[i];
return -ENOSPC; int err;
return 0; err = ixgbe_xmit_xdp_ring(adapter, xdpf);
if (err != IXGBE_XDP_TX) {
xdp_return_frame_rx_napi(xdpf);
drops++;
}
}
return n - drops;
} }
static void ixgbe_xdp_flush(struct net_device *dev) static void ixgbe_xdp_flush(struct net_device *dev)
......
...@@ -211,6 +211,60 @@ emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, u16 addr, u8 defer) ...@@ -211,6 +211,60 @@ emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, u16 addr, u8 defer)
emit_br_relo(nfp_prog, mask, addr, defer, RELO_BR_REL); emit_br_relo(nfp_prog, mask, addr, defer, RELO_BR_REL);
} }
static void
__emit_br_bit(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 addr, u8 defer,
bool set, bool src_lmextn)
{
u16 addr_lo, addr_hi;
u64 insn;
addr_lo = addr & (OP_BR_BIT_ADDR_LO >> __bf_shf(OP_BR_BIT_ADDR_LO));
addr_hi = addr != addr_lo;
insn = OP_BR_BIT_BASE |
FIELD_PREP(OP_BR_BIT_A_SRC, areg) |
FIELD_PREP(OP_BR_BIT_B_SRC, breg) |
FIELD_PREP(OP_BR_BIT_BV, set) |
FIELD_PREP(OP_BR_BIT_DEFBR, defer) |
FIELD_PREP(OP_BR_BIT_ADDR_LO, addr_lo) |
FIELD_PREP(OP_BR_BIT_ADDR_HI, addr_hi) |
FIELD_PREP(OP_BR_BIT_SRC_LMEXTN, src_lmextn);
nfp_prog_push(nfp_prog, insn);
}
static void
emit_br_bit_relo(struct nfp_prog *nfp_prog, swreg src, u8 bit, u16 addr,
u8 defer, bool set, enum nfp_relo_type relo)
{
struct nfp_insn_re_regs reg;
int err;
/* NOTE: The bit to test is specified as an rotation amount, such that
* the bit to test will be placed on the MSB of the result when
* doing a rotate right. For bit X, we need right rotate X + 1.
*/
bit += 1;
err = swreg_to_restricted(reg_none(), src, reg_imm(bit), &reg, false);
if (err) {
nfp_prog->error = err;
return;
}
__emit_br_bit(nfp_prog, reg.areg, reg.breg, addr, defer, set,
reg.src_lmextn);
nfp_prog->prog[nfp_prog->prog_len - 1] |=
FIELD_PREP(OP_RELO_TYPE, relo);
}
static void
emit_br_bset(struct nfp_prog *nfp_prog, swreg src, u8 bit, u16 addr, u8 defer)
{
emit_br_bit_relo(nfp_prog, src, bit, addr, defer, true, RELO_BR_REL);
}
static void static void
__emit_immed(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 imm_hi, __emit_immed(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 imm_hi,
enum immed_width width, bool invert, enum immed_width width, bool invert,
...@@ -309,6 +363,19 @@ emit_shf(struct nfp_prog *nfp_prog, swreg dst, ...@@ -309,6 +363,19 @@ emit_shf(struct nfp_prog *nfp_prog, swreg dst,
reg.dst_lmextn, reg.src_lmextn); reg.dst_lmextn, reg.src_lmextn);
} }
static void
emit_shf_indir(struct nfp_prog *nfp_prog, swreg dst,
swreg lreg, enum shf_op op, swreg rreg, enum shf_sc sc)
{
if (sc == SHF_SC_R_ROT) {
pr_err("indirect shift is not allowed on rotation\n");
nfp_prog->error = -EFAULT;
return;
}
emit_shf(nfp_prog, dst, lreg, op, rreg, sc, 0);
}
static void static void
__emit_alu(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab, __emit_alu(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab,
u16 areg, enum alu_op op, u16 breg, bool swap, bool wr_both, u16 areg, enum alu_op op, u16 breg, bool swap, bool wr_both,
...@@ -1629,26 +1696,142 @@ static int neg_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) ...@@ -1629,26 +1696,142 @@ static int neg_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
return 0; return 0;
} }
/* Pseudo code:
* if shift_amt >= 32
* dst_high = dst_low << shift_amt[4:0]
* dst_low = 0;
* else
* dst_high = (dst_high, dst_low) >> (32 - shift_amt)
* dst_low = dst_low << shift_amt
*
* The indirect shift will use the same logic at runtime.
*/
static int __shl_imm64(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt)
{
if (shift_amt < 32) {
emit_shf(nfp_prog, reg_both(dst + 1), reg_a(dst + 1),
SHF_OP_NONE, reg_b(dst), SHF_SC_R_DSHF,
32 - shift_amt);
emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE,
reg_b(dst), SHF_SC_L_SHF, shift_amt);
} else if (shift_amt == 32) {
wrp_reg_mov(nfp_prog, dst + 1, dst);
wrp_immed(nfp_prog, reg_both(dst), 0);
} else if (shift_amt > 32) {
emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_NONE,
reg_b(dst), SHF_SC_L_SHF, shift_amt - 32);
wrp_immed(nfp_prog, reg_both(dst), 0);
}
return 0;
}
static int shl_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) static int shl_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{ {
const struct bpf_insn *insn = &meta->insn; const struct bpf_insn *insn = &meta->insn;
u8 dst = insn->dst_reg * 2; u8 dst = insn->dst_reg * 2;
if (insn->imm < 32) { return __shl_imm64(nfp_prog, dst, insn->imm);
emit_shf(nfp_prog, reg_both(dst + 1), }
reg_a(dst + 1), SHF_OP_NONE, reg_b(dst),
SHF_SC_R_DSHF, 32 - insn->imm); static void shl_reg64_lt32_high(struct nfp_prog *nfp_prog, u8 dst, u8 src)
emit_shf(nfp_prog, reg_both(dst), {
reg_none(), SHF_OP_NONE, reg_b(dst), emit_alu(nfp_prog, imm_both(nfp_prog), reg_imm(32), ALU_OP_SUB,
SHF_SC_L_SHF, insn->imm); reg_b(src));
} else if (insn->imm == 32) { emit_alu(nfp_prog, reg_none(), imm_a(nfp_prog), ALU_OP_OR, reg_imm(0));
wrp_reg_mov(nfp_prog, dst + 1, dst); emit_shf_indir(nfp_prog, reg_both(dst + 1), reg_a(dst + 1), SHF_OP_NONE,
wrp_immed(nfp_prog, reg_both(dst), 0); reg_b(dst), SHF_SC_R_DSHF);
} else if (insn->imm > 32) { }
emit_shf(nfp_prog, reg_both(dst + 1),
reg_none(), SHF_OP_NONE, reg_b(dst), /* NOTE: for indirect left shift, HIGH part should be calculated first. */
SHF_SC_L_SHF, insn->imm - 32); static void shl_reg64_lt32_low(struct nfp_prog *nfp_prog, u8 dst, u8 src)
wrp_immed(nfp_prog, reg_both(dst), 0); {
emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0));
emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE,
reg_b(dst), SHF_SC_L_SHF);
}
static void shl_reg64_lt32(struct nfp_prog *nfp_prog, u8 dst, u8 src)
{
shl_reg64_lt32_high(nfp_prog, dst, src);
shl_reg64_lt32_low(nfp_prog, dst, src);
}
static void shl_reg64_ge32(struct nfp_prog *nfp_prog, u8 dst, u8 src)
{
emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0));
emit_shf_indir(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_NONE,
reg_b(dst), SHF_SC_L_SHF);
wrp_immed(nfp_prog, reg_both(dst), 0);
}
static int shl_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
const struct bpf_insn *insn = &meta->insn;
u64 umin, umax;
u8 dst, src;
dst = insn->dst_reg * 2;
umin = meta->umin;
umax = meta->umax;
if (umin == umax)
return __shl_imm64(nfp_prog, dst, umin);
src = insn->src_reg * 2;
if (umax < 32) {
shl_reg64_lt32(nfp_prog, dst, src);
} else if (umin >= 32) {
shl_reg64_ge32(nfp_prog, dst, src);
} else {
/* Generate different instruction sequences depending on runtime
* value of shift amount.
*/
u16 label_ge32, label_end;
label_ge32 = nfp_prog_current_offset(nfp_prog) + 7;
emit_br_bset(nfp_prog, reg_a(src), 5, label_ge32, 0);
shl_reg64_lt32_high(nfp_prog, dst, src);
label_end = nfp_prog_current_offset(nfp_prog) + 6;
emit_br(nfp_prog, BR_UNC, label_end, 2);
/* shl_reg64_lt32_low packed in delay slot. */
shl_reg64_lt32_low(nfp_prog, dst, src);
if (!nfp_prog_confirm_current_offset(nfp_prog, label_ge32))
return -EINVAL;
shl_reg64_ge32(nfp_prog, dst, src);
if (!nfp_prog_confirm_current_offset(nfp_prog, label_end))
return -EINVAL;
}
return 0;
}
/* Pseudo code:
* if shift_amt >= 32
* dst_high = 0;
* dst_low = dst_high >> shift_amt[4:0]
* else
* dst_high = dst_high >> shift_amt
* dst_low = (dst_high, dst_low) >> shift_amt
*
* The indirect shift will use the same logic at runtime.
*/
static int __shr_imm64(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt)
{
if (shift_amt < 32) {
emit_shf(nfp_prog, reg_both(dst), reg_a(dst + 1), SHF_OP_NONE,
reg_b(dst), SHF_SC_R_DSHF, shift_amt);
emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_NONE,
reg_b(dst + 1), SHF_SC_R_SHF, shift_amt);
} else if (shift_amt == 32) {
wrp_reg_mov(nfp_prog, dst, dst + 1);
wrp_immed(nfp_prog, reg_both(dst + 1), 0);
} else if (shift_amt > 32) {
emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE,
reg_b(dst + 1), SHF_SC_R_SHF, shift_amt - 32);
wrp_immed(nfp_prog, reg_both(dst + 1), 0);
} }
return 0; return 0;
...@@ -1659,21 +1842,186 @@ static int shr_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) ...@@ -1659,21 +1842,186 @@ static int shr_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
const struct bpf_insn *insn = &meta->insn; const struct bpf_insn *insn = &meta->insn;
u8 dst = insn->dst_reg * 2; u8 dst = insn->dst_reg * 2;
if (insn->imm < 32) { return __shr_imm64(nfp_prog, dst, insn->imm);
emit_shf(nfp_prog, reg_both(dst), }
reg_a(dst + 1), SHF_OP_NONE, reg_b(dst),
SHF_SC_R_DSHF, insn->imm); /* NOTE: for indirect right shift, LOW part should be calculated first. */
emit_shf(nfp_prog, reg_both(dst + 1), static void shr_reg64_lt32_high(struct nfp_prog *nfp_prog, u8 dst, u8 src)
reg_none(), SHF_OP_NONE, reg_b(dst + 1), {
SHF_SC_R_SHF, insn->imm); emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0));
} else if (insn->imm == 32) { emit_shf_indir(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_NONE,
reg_b(dst + 1), SHF_SC_R_SHF);
}
static void shr_reg64_lt32_low(struct nfp_prog *nfp_prog, u8 dst, u8 src)
{
emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0));
emit_shf_indir(nfp_prog, reg_both(dst), reg_a(dst + 1), SHF_OP_NONE,
reg_b(dst), SHF_SC_R_DSHF);
}
static void shr_reg64_lt32(struct nfp_prog *nfp_prog, u8 dst, u8 src)
{
shr_reg64_lt32_low(nfp_prog, dst, src);
shr_reg64_lt32_high(nfp_prog, dst, src);
}
static void shr_reg64_ge32(struct nfp_prog *nfp_prog, u8 dst, u8 src)
{
emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0));
emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE,
reg_b(dst + 1), SHF_SC_R_SHF);
wrp_immed(nfp_prog, reg_both(dst + 1), 0);
}
static int shr_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
const struct bpf_insn *insn = &meta->insn;
u64 umin, umax;
u8 dst, src;
dst = insn->dst_reg * 2;
umin = meta->umin;
umax = meta->umax;
if (umin == umax)
return __shr_imm64(nfp_prog, dst, umin);
src = insn->src_reg * 2;
if (umax < 32) {
shr_reg64_lt32(nfp_prog, dst, src);
} else if (umin >= 32) {
shr_reg64_ge32(nfp_prog, dst, src);
} else {
/* Generate different instruction sequences depending on runtime
* value of shift amount.
*/
u16 label_ge32, label_end;
label_ge32 = nfp_prog_current_offset(nfp_prog) + 6;
emit_br_bset(nfp_prog, reg_a(src), 5, label_ge32, 0);
shr_reg64_lt32_low(nfp_prog, dst, src);
label_end = nfp_prog_current_offset(nfp_prog) + 6;
emit_br(nfp_prog, BR_UNC, label_end, 2);
/* shr_reg64_lt32_high packed in delay slot. */
shr_reg64_lt32_high(nfp_prog, dst, src);
if (!nfp_prog_confirm_current_offset(nfp_prog, label_ge32))
return -EINVAL;
shr_reg64_ge32(nfp_prog, dst, src);
if (!nfp_prog_confirm_current_offset(nfp_prog, label_end))
return -EINVAL;
}
return 0;
}
/* Code logic is the same as __shr_imm64 except ashr requires signedness bit
* told through PREV_ALU result.
*/
static int __ashr_imm64(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt)
{
if (shift_amt < 32) {
emit_shf(nfp_prog, reg_both(dst), reg_a(dst + 1), SHF_OP_NONE,
reg_b(dst), SHF_SC_R_DSHF, shift_amt);
/* Set signedness bit. */
emit_alu(nfp_prog, reg_none(), reg_a(dst + 1), ALU_OP_OR,
reg_imm(0));
emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR,
reg_b(dst + 1), SHF_SC_R_SHF, shift_amt);
} else if (shift_amt == 32) {
/* NOTE: this also helps setting signedness bit. */
wrp_reg_mov(nfp_prog, dst, dst + 1); wrp_reg_mov(nfp_prog, dst, dst + 1);
wrp_immed(nfp_prog, reg_both(dst + 1), 0); emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR,
} else if (insn->imm > 32) { reg_b(dst + 1), SHF_SC_R_SHF, 31);
emit_shf(nfp_prog, reg_both(dst), } else if (shift_amt > 32) {
reg_none(), SHF_OP_NONE, reg_b(dst + 1), emit_alu(nfp_prog, reg_none(), reg_a(dst + 1), ALU_OP_OR,
SHF_SC_R_SHF, insn->imm - 32); reg_imm(0));
wrp_immed(nfp_prog, reg_both(dst + 1), 0); emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_ASHR,
reg_b(dst + 1), SHF_SC_R_SHF, shift_amt - 32);
emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR,
reg_b(dst + 1), SHF_SC_R_SHF, 31);
}
return 0;
}
static int ashr_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
const struct bpf_insn *insn = &meta->insn;
u8 dst = insn->dst_reg * 2;
return __ashr_imm64(nfp_prog, dst, insn->imm);
}
static void ashr_reg64_lt32_high(struct nfp_prog *nfp_prog, u8 dst, u8 src)
{
/* NOTE: the first insn will set both indirect shift amount (source A)
* and signedness bit (MSB of result).
*/
emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_b(dst + 1));
emit_shf_indir(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR,
reg_b(dst + 1), SHF_SC_R_SHF);
}
static void ashr_reg64_lt32_low(struct nfp_prog *nfp_prog, u8 dst, u8 src)
{
/* NOTE: it is the same as logic shift because we don't need to shift in
* signedness bit when the shift amount is less than 32.
*/
return shr_reg64_lt32_low(nfp_prog, dst, src);
}
static void ashr_reg64_lt32(struct nfp_prog *nfp_prog, u8 dst, u8 src)
{
ashr_reg64_lt32_low(nfp_prog, dst, src);
ashr_reg64_lt32_high(nfp_prog, dst, src);
}
static void ashr_reg64_ge32(struct nfp_prog *nfp_prog, u8 dst, u8 src)
{
emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_b(dst + 1));
emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_ASHR,
reg_b(dst + 1), SHF_SC_R_SHF);
emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR,
reg_b(dst + 1), SHF_SC_R_SHF, 31);
}
/* Like ashr_imm64, but need to use indirect shift. */
static int ashr_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
const struct bpf_insn *insn = &meta->insn;
u64 umin, umax;
u8 dst, src;
dst = insn->dst_reg * 2;
umin = meta->umin;
umax = meta->umax;
if (umin == umax)
return __ashr_imm64(nfp_prog, dst, umin);
src = insn->src_reg * 2;
if (umax < 32) {
ashr_reg64_lt32(nfp_prog, dst, src);
} else if (umin >= 32) {
ashr_reg64_ge32(nfp_prog, dst, src);
} else {
u16 label_ge32, label_end;
label_ge32 = nfp_prog_current_offset(nfp_prog) + 6;
emit_br_bset(nfp_prog, reg_a(src), 5, label_ge32, 0);
ashr_reg64_lt32_low(nfp_prog, dst, src);
label_end = nfp_prog_current_offset(nfp_prog) + 6;
emit_br(nfp_prog, BR_UNC, label_end, 2);
/* ashr_reg64_lt32_high packed in delay slot. */
ashr_reg64_lt32_high(nfp_prog, dst, src);
if (!nfp_prog_confirm_current_offset(nfp_prog, label_ge32))
return -EINVAL;
ashr_reg64_ge32(nfp_prog, dst, src);
if (!nfp_prog_confirm_current_offset(nfp_prog, label_end))
return -EINVAL;
} }
return 0; return 0;
...@@ -2501,8 +2849,12 @@ static const instr_cb_t instr_cb[256] = { ...@@ -2501,8 +2849,12 @@ static const instr_cb_t instr_cb[256] = {
[BPF_ALU64 | BPF_SUB | BPF_X] = sub_reg64, [BPF_ALU64 | BPF_SUB | BPF_X] = sub_reg64,
[BPF_ALU64 | BPF_SUB | BPF_K] = sub_imm64, [BPF_ALU64 | BPF_SUB | BPF_K] = sub_imm64,
[BPF_ALU64 | BPF_NEG] = neg_reg64, [BPF_ALU64 | BPF_NEG] = neg_reg64,
[BPF_ALU64 | BPF_LSH | BPF_X] = shl_reg64,
[BPF_ALU64 | BPF_LSH | BPF_K] = shl_imm64, [BPF_ALU64 | BPF_LSH | BPF_K] = shl_imm64,
[BPF_ALU64 | BPF_RSH | BPF_X] = shr_reg64,
[BPF_ALU64 | BPF_RSH | BPF_K] = shr_imm64, [BPF_ALU64 | BPF_RSH | BPF_K] = shr_imm64,
[BPF_ALU64 | BPF_ARSH | BPF_X] = ashr_reg64,
[BPF_ALU64 | BPF_ARSH | BPF_K] = ashr_imm64,
[BPF_ALU | BPF_MOV | BPF_X] = mov_reg, [BPF_ALU | BPF_MOV | BPF_X] = mov_reg,
[BPF_ALU | BPF_MOV | BPF_K] = mov_imm, [BPF_ALU | BPF_MOV | BPF_K] = mov_imm,
[BPF_ALU | BPF_XOR | BPF_X] = xor_reg, [BPF_ALU | BPF_XOR | BPF_X] = xor_reg,
......
...@@ -263,6 +263,8 @@ struct nfp_bpf_reg_state { ...@@ -263,6 +263,8 @@ struct nfp_bpf_reg_state {
* @func_id: function id for call instructions * @func_id: function id for call instructions
* @arg1: arg1 for call instructions * @arg1: arg1 for call instructions
* @arg2: arg2 for call instructions * @arg2: arg2 for call instructions
* @umin: copy of core verifier umin_value.
* @umax: copy of core verifier umax_value.
* @off: index of first generated machine instruction (in nfp_prog.prog) * @off: index of first generated machine instruction (in nfp_prog.prog)
* @n: eBPF instruction number * @n: eBPF instruction number
* @flags: eBPF instruction extra optimization flags * @flags: eBPF instruction extra optimization flags
...@@ -298,6 +300,13 @@ struct nfp_insn_meta { ...@@ -298,6 +300,13 @@ struct nfp_insn_meta {
struct bpf_reg_state arg1; struct bpf_reg_state arg1;
struct nfp_bpf_reg_state arg2; struct nfp_bpf_reg_state arg2;
}; };
/* We are interested in range info for some operands,
* for example, the shift amount.
*/
struct {
u64 umin;
u64 umax;
};
}; };
unsigned int off; unsigned int off;
unsigned short n; unsigned short n;
...@@ -375,6 +384,25 @@ static inline bool is_mbpf_xadd(const struct nfp_insn_meta *meta) ...@@ -375,6 +384,25 @@ static inline bool is_mbpf_xadd(const struct nfp_insn_meta *meta)
return (meta->insn.code & ~BPF_SIZE_MASK) == (BPF_STX | BPF_XADD); return (meta->insn.code & ~BPF_SIZE_MASK) == (BPF_STX | BPF_XADD);
} }
static inline bool is_mbpf_indir_shift(const struct nfp_insn_meta *meta)
{
u8 code = meta->insn.code;
bool is_alu, is_shift;
u8 opclass, opcode;
opclass = BPF_CLASS(code);
is_alu = opclass == BPF_ALU64 || opclass == BPF_ALU;
if (!is_alu)
return false;
opcode = BPF_OP(code);
is_shift = opcode == BPF_LSH || opcode == BPF_RSH || opcode == BPF_ARSH;
if (!is_shift)
return false;
return BPF_SRC(code) == BPF_X;
}
/** /**
* struct nfp_prog - nfp BPF program * struct nfp_prog - nfp BPF program
* @bpf: backpointer to the bpf app priv structure * @bpf: backpointer to the bpf app priv structure
......
...@@ -190,6 +190,8 @@ nfp_prog_prepare(struct nfp_prog *nfp_prog, const struct bpf_insn *prog, ...@@ -190,6 +190,8 @@ nfp_prog_prepare(struct nfp_prog *nfp_prog, const struct bpf_insn *prog,
meta->insn = prog[i]; meta->insn = prog[i];
meta->n = i; meta->n = i;
if (is_mbpf_indir_shift(meta))
meta->umin = U64_MAX;
list_add_tail(&meta->l, &nfp_prog->insns); list_add_tail(&meta->l, &nfp_prog->insns);
} }
......
...@@ -551,6 +551,14 @@ nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx) ...@@ -551,6 +551,14 @@ nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx)
if (is_mbpf_xadd(meta)) if (is_mbpf_xadd(meta))
return nfp_bpf_check_xadd(nfp_prog, meta, env); return nfp_bpf_check_xadd(nfp_prog, meta, env);
if (is_mbpf_indir_shift(meta)) {
const struct bpf_reg_state *sreg =
cur_regs(env) + meta->insn.src_reg;
meta->umin = min(meta->umin, sreg->umin_value);
meta->umax = max(meta->umax, sreg->umax_value);
}
return 0; return 0;
} }
......
...@@ -72,8 +72,21 @@ ...@@ -72,8 +72,21 @@
#define OP_BR_ADDR_LO 0x007ffc00000ULL #define OP_BR_ADDR_LO 0x007ffc00000ULL
#define OP_BR_ADDR_HI 0x10000000000ULL #define OP_BR_ADDR_HI 0x10000000000ULL
#define nfp_is_br(_insn) \ #define OP_BR_BIT_BASE 0x0d000000000ULL
(((_insn) & OP_BR_BASE_MASK) == OP_BR_BASE) #define OP_BR_BIT_BASE_MASK 0x0f800080300ULL
#define OP_BR_BIT_A_SRC 0x000000000ffULL
#define OP_BR_BIT_B_SRC 0x0000003fc00ULL
#define OP_BR_BIT_BV 0x00000040000ULL
#define OP_BR_BIT_SRC_LMEXTN 0x40000000000ULL
#define OP_BR_BIT_DEFBR OP_BR_DEFBR
#define OP_BR_BIT_ADDR_LO OP_BR_ADDR_LO
#define OP_BR_BIT_ADDR_HI OP_BR_ADDR_HI
static inline bool nfp_is_br(u64 insn)
{
return (insn & OP_BR_BASE_MASK) == OP_BR_BASE ||
(insn & OP_BR_BIT_BASE_MASK) == OP_BR_BIT_BASE;
}
enum br_mask { enum br_mask {
BR_BEQ = 0x00, BR_BEQ = 0x00,
...@@ -161,6 +174,7 @@ enum shf_op { ...@@ -161,6 +174,7 @@ enum shf_op {
SHF_OP_NONE = 0, SHF_OP_NONE = 0,
SHF_OP_AND = 2, SHF_OP_AND = 2,
SHF_OP_OR = 5, SHF_OP_OR = 5,
SHF_OP_ASHR = 6,
}; };
enum shf_sc { enum shf_sc {
......
...@@ -70,6 +70,7 @@ ...@@ -70,6 +70,7 @@
#include <net/netns/generic.h> #include <net/netns/generic.h>
#include <net/rtnetlink.h> #include <net/rtnetlink.h>
#include <net/sock.h> #include <net/sock.h>
#include <net/xdp.h>
#include <linux/seq_file.h> #include <linux/seq_file.h>
#include <linux/uio.h> #include <linux/uio.h>
#include <linux/skb_array.h> #include <linux/skb_array.h>
...@@ -1284,34 +1285,44 @@ static const struct net_device_ops tun_netdev_ops = { ...@@ -1284,34 +1285,44 @@ static const struct net_device_ops tun_netdev_ops = {
.ndo_get_stats64 = tun_net_get_stats64, .ndo_get_stats64 = tun_net_get_stats64,
}; };
static int tun_xdp_xmit(struct net_device *dev, struct xdp_frame *frame) static int tun_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames)
{ {
struct tun_struct *tun = netdev_priv(dev); struct tun_struct *tun = netdev_priv(dev);
struct tun_file *tfile; struct tun_file *tfile;
u32 numqueues; u32 numqueues;
int ret = 0; int drops = 0;
int cnt = n;
int i;
rcu_read_lock(); rcu_read_lock();
numqueues = READ_ONCE(tun->numqueues); numqueues = READ_ONCE(tun->numqueues);
if (!numqueues) { if (!numqueues) {
ret = -ENOSPC; rcu_read_unlock();
goto out; return -ENXIO; /* Caller will free/return all frames */
} }
tfile = rcu_dereference(tun->tfiles[smp_processor_id() % tfile = rcu_dereference(tun->tfiles[smp_processor_id() %
numqueues]); numqueues]);
/* Encode the XDP flag into lowest bit for consumer to differ
* XDP buffer from sk_buff. spin_lock(&tfile->tx_ring.producer_lock);
*/ for (i = 0; i < n; i++) {
if (ptr_ring_produce(&tfile->tx_ring, tun_xdp_to_ptr(frame))) { struct xdp_frame *xdp = frames[i];
this_cpu_inc(tun->pcpu_stats->tx_dropped); /* Encode the XDP flag into lowest bit for consumer to differ
ret = -ENOSPC; * XDP buffer from sk_buff.
*/
void *frame = tun_xdp_to_ptr(xdp);
if (__ptr_ring_produce(&tfile->tx_ring, frame)) {
this_cpu_inc(tun->pcpu_stats->tx_dropped);
xdp_return_frame_rx_napi(xdp);
drops++;
}
} }
spin_unlock(&tfile->tx_ring.producer_lock);
out:
rcu_read_unlock(); rcu_read_unlock();
return ret; return cnt - drops;
} }
static int tun_xdp_tx(struct net_device *dev, struct xdp_buff *xdp) static int tun_xdp_tx(struct net_device *dev, struct xdp_buff *xdp)
...@@ -1321,7 +1332,7 @@ static int tun_xdp_tx(struct net_device *dev, struct xdp_buff *xdp) ...@@ -1321,7 +1332,7 @@ static int tun_xdp_tx(struct net_device *dev, struct xdp_buff *xdp)
if (unlikely(!frame)) if (unlikely(!frame))
return -EOVERFLOW; return -EOVERFLOW;
return tun_xdp_xmit(dev, frame); return tun_xdp_xmit(dev, 1, &frame);
} }
static void tun_xdp_flush(struct net_device *dev) static void tun_xdp_flush(struct net_device *dev)
......
...@@ -419,23 +419,13 @@ static void virtnet_xdp_flush(struct net_device *dev) ...@@ -419,23 +419,13 @@ static void virtnet_xdp_flush(struct net_device *dev)
virtqueue_kick(sq->vq); virtqueue_kick(sq->vq);
} }
static int __virtnet_xdp_xmit(struct virtnet_info *vi, static int __virtnet_xdp_xmit_one(struct virtnet_info *vi,
struct xdp_frame *xdpf) struct send_queue *sq,
struct xdp_frame *xdpf)
{ {
struct virtio_net_hdr_mrg_rxbuf *hdr; struct virtio_net_hdr_mrg_rxbuf *hdr;
struct xdp_frame *xdpf_sent;
struct send_queue *sq;
unsigned int len;
unsigned int qp;
int err; int err;
qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id();
sq = &vi->sq[qp];
/* Free up any pending old buffers before queueing new ones. */
while ((xdpf_sent = virtqueue_get_buf(sq->vq, &len)) != NULL)
xdp_return_frame(xdpf_sent);
/* virtqueue want to use data area in-front of packet */ /* virtqueue want to use data area in-front of packet */
if (unlikely(xdpf->metasize > 0)) if (unlikely(xdpf->metasize > 0))
return -EOPNOTSUPP; return -EOPNOTSUPP;
...@@ -459,11 +449,40 @@ static int __virtnet_xdp_xmit(struct virtnet_info *vi, ...@@ -459,11 +449,40 @@ static int __virtnet_xdp_xmit(struct virtnet_info *vi,
return 0; return 0;
} }
static int virtnet_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf) static int __virtnet_xdp_tx_xmit(struct virtnet_info *vi,
struct xdp_frame *xdpf)
{
struct xdp_frame *xdpf_sent;
struct send_queue *sq;
unsigned int len;
unsigned int qp;
qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id();
sq = &vi->sq[qp];
/* Free up any pending old buffers before queueing new ones. */
while ((xdpf_sent = virtqueue_get_buf(sq->vq, &len)) != NULL)
xdp_return_frame(xdpf_sent);
return __virtnet_xdp_xmit_one(vi, sq, xdpf);
}
static int virtnet_xdp_xmit(struct net_device *dev,
int n, struct xdp_frame **frames)
{ {
struct virtnet_info *vi = netdev_priv(dev); struct virtnet_info *vi = netdev_priv(dev);
struct receive_queue *rq = vi->rq; struct receive_queue *rq = vi->rq;
struct xdp_frame *xdpf_sent;
struct bpf_prog *xdp_prog; struct bpf_prog *xdp_prog;
struct send_queue *sq;
unsigned int len;
unsigned int qp;
int drops = 0;
int err;
int i;
qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id();
sq = &vi->sq[qp];
/* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this /* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this
* indicate XDP resources have been successfully allocated. * indicate XDP resources have been successfully allocated.
...@@ -472,7 +491,20 @@ static int virtnet_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf) ...@@ -472,7 +491,20 @@ static int virtnet_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf)
if (!xdp_prog) if (!xdp_prog)
return -ENXIO; return -ENXIO;
return __virtnet_xdp_xmit(vi, xdpf); /* Free up any pending old buffers before queueing new ones. */
while ((xdpf_sent = virtqueue_get_buf(sq->vq, &len)) != NULL)
xdp_return_frame(xdpf_sent);
for (i = 0; i < n; i++) {
struct xdp_frame *xdpf = frames[i];
err = __virtnet_xdp_xmit_one(vi, sq, xdpf);
if (err) {
xdp_return_frame_rx_napi(xdpf);
drops++;
}
}
return n - drops;
} }
static unsigned int virtnet_get_headroom(struct virtnet_info *vi) static unsigned int virtnet_get_headroom(struct virtnet_info *vi)
...@@ -616,7 +648,7 @@ static struct sk_buff *receive_small(struct net_device *dev, ...@@ -616,7 +648,7 @@ static struct sk_buff *receive_small(struct net_device *dev,
xdpf = convert_to_xdp_frame(&xdp); xdpf = convert_to_xdp_frame(&xdp);
if (unlikely(!xdpf)) if (unlikely(!xdpf))
goto err_xdp; goto err_xdp;
err = __virtnet_xdp_xmit(vi, xdpf); err = __virtnet_xdp_tx_xmit(vi, xdpf);
if (unlikely(err)) { if (unlikely(err)) {
trace_xdp_exception(vi->dev, xdp_prog, act); trace_xdp_exception(vi->dev, xdp_prog, act);
goto err_xdp; goto err_xdp;
...@@ -779,7 +811,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, ...@@ -779,7 +811,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
xdpf = convert_to_xdp_frame(&xdp); xdpf = convert_to_xdp_frame(&xdp);
if (unlikely(!xdpf)) if (unlikely(!xdpf))
goto err_xdp; goto err_xdp;
err = __virtnet_xdp_xmit(vi, xdpf); err = __virtnet_xdp_tx_xmit(vi, xdpf);
if (unlikely(err)) { if (unlikely(err)) {
trace_xdp_exception(vi->dev, xdp_prog, act); trace_xdp_exception(vi->dev, xdp_prog, act);
if (unlikely(xdp_page != page)) if (unlikely(xdp_page != page))
......
...@@ -69,8 +69,8 @@ struct bpf_map { ...@@ -69,8 +69,8 @@ struct bpf_map {
u32 pages; u32 pages;
u32 id; u32 id;
int numa_node; int numa_node;
u32 btf_key_id; u32 btf_key_type_id;
u32 btf_value_id; u32 btf_value_type_id;
struct btf *btf; struct btf *btf;
bool unpriv_array; bool unpriv_array;
/* 55 bytes hole */ /* 55 bytes hole */
...@@ -463,6 +463,8 @@ int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file, ...@@ -463,6 +463,8 @@ int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file,
int bpf_fd_htab_map_lookup_elem(struct bpf_map *map, void *key, u32 *value); int bpf_fd_htab_map_lookup_elem(struct bpf_map *map, void *key, u32 *value);
int bpf_get_file_flag(int flags); int bpf_get_file_flag(int flags);
int bpf_check_uarg_tail_zero(void __user *uaddr, size_t expected_size,
size_t actual_size);
/* memcpy that is used with 8-byte aligned pointers, power-of-8 size and /* memcpy that is used with 8-byte aligned pointers, power-of-8 size and
* forced to use 'long' read/writes to try to atomically copy long counters. * forced to use 'long' read/writes to try to atomically copy long counters.
...@@ -485,14 +487,17 @@ int bpf_check(struct bpf_prog **fp, union bpf_attr *attr); ...@@ -485,14 +487,17 @@ int bpf_check(struct bpf_prog **fp, union bpf_attr *attr);
void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth); void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth);
/* Map specifics */ /* Map specifics */
struct net_device *__dev_map_lookup_elem(struct bpf_map *map, u32 key); struct xdp_buff;
struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key);
void __dev_map_insert_ctx(struct bpf_map *map, u32 index); void __dev_map_insert_ctx(struct bpf_map *map, u32 index);
void __dev_map_flush(struct bpf_map *map); void __dev_map_flush(struct bpf_map *map);
int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
struct net_device *dev_rx);
struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key); struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key);
void __cpu_map_insert_ctx(struct bpf_map *map, u32 index); void __cpu_map_insert_ctx(struct bpf_map *map, u32 index);
void __cpu_map_flush(struct bpf_map *map); void __cpu_map_flush(struct bpf_map *map);
struct xdp_buff;
int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp, int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,
struct net_device *dev_rx); struct net_device *dev_rx);
...@@ -571,6 +576,16 @@ static inline void __dev_map_flush(struct bpf_map *map) ...@@ -571,6 +576,16 @@ static inline void __dev_map_flush(struct bpf_map *map)
{ {
} }
struct xdp_buff;
struct bpf_dtab_netdev;
static inline
int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
struct net_device *dev_rx)
{
return 0;
}
static inline static inline
struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key) struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key)
{ {
...@@ -585,7 +600,6 @@ static inline void __cpu_map_flush(struct bpf_map *map) ...@@ -585,7 +600,6 @@ static inline void __cpu_map_flush(struct bpf_map *map)
{ {
} }
struct xdp_buff;
static inline int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, static inline int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu,
struct xdp_buff *xdp, struct xdp_buff *xdp,
struct net_device *dev_rx) struct net_device *dev_rx)
......
...@@ -9,9 +9,10 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_XDP, xdp) ...@@ -9,9 +9,10 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_XDP, xdp)
BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SKB, cg_skb) BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SKB, cg_skb)
BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK, cg_sock) BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK, cg_sock)
BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK_ADDR, cg_sock_addr) BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK_ADDR, cg_sock_addr)
BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_inout) BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_in)
BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_inout) BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_out)
BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_XMIT, lwt_xmit) BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_XMIT, lwt_xmit)
BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_SEG6LOCAL, lwt_seg6local)
BPF_PROG_TYPE(BPF_PROG_TYPE_SOCK_OPS, sock_ops) BPF_PROG_TYPE(BPF_PROG_TYPE_SOCK_OPS, sock_ops)
BPF_PROG_TYPE(BPF_PROG_TYPE_SK_SKB, sk_skb) BPF_PROG_TYPE(BPF_PROG_TYPE_SK_SKB, sk_skb)
BPF_PROG_TYPE(BPF_PROG_TYPE_SK_MSG, sk_msg) BPF_PROG_TYPE(BPF_PROG_TYPE_SK_MSG, sk_msg)
......
...@@ -517,6 +517,7 @@ struct sk_msg_buff { ...@@ -517,6 +517,7 @@ struct sk_msg_buff {
bool sg_copy[MAX_SKB_FRAGS]; bool sg_copy[MAX_SKB_FRAGS];
__u32 flags; __u32 flags;
struct sock *sk_redir; struct sock *sk_redir;
struct sock *sk;
struct sk_buff *skb; struct sk_buff *skb;
struct list_head list; struct list_head list;
}; };
......
...@@ -1185,9 +1185,13 @@ struct dev_ifalias { ...@@ -1185,9 +1185,13 @@ struct dev_ifalias {
* This function is used to set or query state related to XDP on the * This function is used to set or query state related to XDP on the
* netdevice and manage BPF offload. See definition of * netdevice and manage BPF offload. See definition of
* enum bpf_netdev_command for details. * enum bpf_netdev_command for details.
* int (*ndo_xdp_xmit)(struct net_device *dev, struct xdp_frame *xdp); * int (*ndo_xdp_xmit)(struct net_device *dev, int n, struct xdp_frame **xdp);
* This function is used to submit a XDP packet for transmit on a * This function is used to submit @n XDP packets for transmit on a
* netdevice. * netdevice. Returns number of frames successfully transmitted, frames
* that got dropped are freed/returned via xdp_return_frame().
* Returns negative number, means general error invoking ndo, meaning
* no frames were xmit'ed and core-caller will free all frames.
* TODO: Consider add flag to allow sending flush operation.
* void (*ndo_xdp_flush)(struct net_device *dev); * void (*ndo_xdp_flush)(struct net_device *dev);
* This function is used to inform the driver to flush a particular * This function is used to inform the driver to flush a particular
* xdp tx queue. Must be called on same CPU as xdp_xmit. * xdp tx queue. Must be called on same CPU as xdp_xmit.
...@@ -1375,8 +1379,8 @@ struct net_device_ops { ...@@ -1375,8 +1379,8 @@ struct net_device_ops {
int needed_headroom); int needed_headroom);
int (*ndo_bpf)(struct net_device *dev, int (*ndo_bpf)(struct net_device *dev,
struct netdev_bpf *bpf); struct netdev_bpf *bpf);
int (*ndo_xdp_xmit)(struct net_device *dev, int (*ndo_xdp_xmit)(struct net_device *dev, int n,
struct xdp_frame *xdp); struct xdp_frame **xdp);
void (*ndo_xdp_flush)(struct net_device *dev); void (*ndo_xdp_flush)(struct net_device *dev);
}; };
......
...@@ -868,6 +868,7 @@ extern void perf_event_exit_task(struct task_struct *child); ...@@ -868,6 +868,7 @@ extern void perf_event_exit_task(struct task_struct *child);
extern void perf_event_free_task(struct task_struct *task); extern void perf_event_free_task(struct task_struct *task);
extern void perf_event_delayed_put(struct task_struct *task); extern void perf_event_delayed_put(struct task_struct *task);
extern struct file *perf_event_get(unsigned int fd); extern struct file *perf_event_get(unsigned int fd);
extern const struct perf_event *perf_get_event(struct file *file);
extern const struct perf_event_attr *perf_event_attrs(struct perf_event *event); extern const struct perf_event_attr *perf_event_attrs(struct perf_event *event);
extern void perf_event_print_debug(void); extern void perf_event_print_debug(void);
extern void perf_pmu_disable(struct pmu *pmu); extern void perf_pmu_disable(struct pmu *pmu);
...@@ -1289,6 +1290,10 @@ static inline void perf_event_exit_task(struct task_struct *child) { } ...@@ -1289,6 +1290,10 @@ static inline void perf_event_exit_task(struct task_struct *child) { }
static inline void perf_event_free_task(struct task_struct *task) { } static inline void perf_event_free_task(struct task_struct *task) { }
static inline void perf_event_delayed_put(struct task_struct *task) { } static inline void perf_event_delayed_put(struct task_struct *task) { }
static inline struct file *perf_event_get(unsigned int fd) { return ERR_PTR(-EINVAL); } static inline struct file *perf_event_get(unsigned int fd) { return ERR_PTR(-EINVAL); }
static inline const struct perf_event *perf_get_event(struct file *file)
{
return ERR_PTR(-EINVAL);
}
static inline const struct perf_event_attr *perf_event_attrs(struct perf_event *event) static inline const struct perf_event_attr *perf_event_attrs(struct perf_event *event)
{ {
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
......
...@@ -473,6 +473,9 @@ int perf_event_query_prog_array(struct perf_event *event, void __user *info); ...@@ -473,6 +473,9 @@ int perf_event_query_prog_array(struct perf_event *event, void __user *info);
int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog); int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog);
int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog); int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog);
struct bpf_raw_event_map *bpf_find_raw_tracepoint(const char *name); struct bpf_raw_event_map *bpf_find_raw_tracepoint(const char *name);
int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
u32 *fd_type, const char **buf,
u64 *probe_offset, u64 *probe_addr);
#else #else
static inline unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx) static inline unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
{ {
...@@ -504,6 +507,13 @@ static inline struct bpf_raw_event_map *bpf_find_raw_tracepoint(const char *name ...@@ -504,6 +507,13 @@ static inline struct bpf_raw_event_map *bpf_find_raw_tracepoint(const char *name
{ {
return NULL; return NULL;
} }
static inline int bpf_get_perf_event_info(const struct perf_event *event,
u32 *prog_id, u32 *fd_type,
const char **buf, u64 *probe_offset,
u64 *probe_addr)
{
return -EOPNOTSUPP;
}
#endif #endif
enum { enum {
...@@ -560,10 +570,17 @@ extern void perf_trace_del(struct perf_event *event, int flags); ...@@ -560,10 +570,17 @@ extern void perf_trace_del(struct perf_event *event, int flags);
#ifdef CONFIG_KPROBE_EVENTS #ifdef CONFIG_KPROBE_EVENTS
extern int perf_kprobe_init(struct perf_event *event, bool is_retprobe); extern int perf_kprobe_init(struct perf_event *event, bool is_retprobe);
extern void perf_kprobe_destroy(struct perf_event *event); extern void perf_kprobe_destroy(struct perf_event *event);
extern int bpf_get_kprobe_info(const struct perf_event *event,
u32 *fd_type, const char **symbol,
u64 *probe_offset, u64 *probe_addr,
bool perf_type_tracepoint);
#endif #endif
#ifdef CONFIG_UPROBE_EVENTS #ifdef CONFIG_UPROBE_EVENTS
extern int perf_uprobe_init(struct perf_event *event, bool is_retprobe); extern int perf_uprobe_init(struct perf_event *event, bool is_retprobe);
extern void perf_uprobe_destroy(struct perf_event *event); extern void perf_uprobe_destroy(struct perf_event *event);
extern int bpf_get_uprobe_info(const struct perf_event *event,
u32 *fd_type, const char **filename,
u64 *probe_offset, bool perf_type_tracepoint);
#endif #endif
extern int ftrace_profile_set_filter(struct perf_event *event, int event_id, extern int ftrace_profile_set_filter(struct perf_event *event, int event_id,
char *filter_str); char *filter_str);
......
...@@ -236,6 +236,8 @@ struct ipv6_stub { ...@@ -236,6 +236,8 @@ struct ipv6_stub {
struct flowi6 *fl6, int oif, struct flowi6 *fl6, int oif,
const struct sk_buff *skb, const struct sk_buff *skb,
int strict); int strict);
u32 (*ip6_mtu_from_fib6)(struct fib6_info *f6i, struct in6_addr *daddr,
struct in6_addr *saddr);
void (*udpv6_encap_enable)(void); void (*udpv6_encap_enable)(void);
void (*ndisc_send_na)(struct net_device *dev, const struct in6_addr *daddr, void (*ndisc_send_na)(struct net_device *dev, const struct in6_addr *daddr,
......
...@@ -412,6 +412,12 @@ static inline struct net_device *fib6_info_nh_dev(const struct fib6_info *f6i) ...@@ -412,6 +412,12 @@ static inline struct net_device *fib6_info_nh_dev(const struct fib6_info *f6i)
return f6i->fib6_nh.nh_dev; return f6i->fib6_nh.nh_dev;
} }
static inline
struct lwtunnel_state *fib6_info_nh_lwt(const struct fib6_info *f6i)
{
return f6i->fib6_nh.nh_lwtstate;
}
void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info, void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
unsigned int flags); unsigned int flags);
......
...@@ -294,6 +294,9 @@ static inline unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst) ...@@ -294,6 +294,9 @@ static inline unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
return mtu; return mtu;
} }
u32 ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
struct in6_addr *saddr);
struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw, struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw,
struct net_device *dev, struct sk_buff *skb, struct net_device *dev, struct sk_buff *skb,
const void *daddr); const void *daddr);
......
...@@ -449,4 +449,6 @@ static inline void fib_proc_exit(struct net *net) ...@@ -449,4 +449,6 @@ static inline void fib_proc_exit(struct net *net)
} }
#endif #endif
u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr);
#endif /* _NET_FIB_H */ #endif /* _NET_FIB_H */
...@@ -115,13 +115,14 @@ void page_pool_destroy(struct page_pool *pool); ...@@ -115,13 +115,14 @@ void page_pool_destroy(struct page_pool *pool);
void __page_pool_put_page(struct page_pool *pool, void __page_pool_put_page(struct page_pool *pool,
struct page *page, bool allow_direct); struct page *page, bool allow_direct);
static inline void page_pool_put_page(struct page_pool *pool, struct page *page) static inline void page_pool_put_page(struct page_pool *pool,
struct page *page, bool allow_direct)
{ {
/* When page_pool isn't compiled-in, net/core/xdp.c doesn't /* When page_pool isn't compiled-in, net/core/xdp.c doesn't
* allow registering MEM_TYPE_PAGE_POOL, but shield linker. * allow registering MEM_TYPE_PAGE_POOL, but shield linker.
*/ */
#ifdef CONFIG_PAGE_POOL #ifdef CONFIG_PAGE_POOL
__page_pool_put_page(pool, page, false); __page_pool_put_page(pool, page, allow_direct);
#endif #endif
} }
/* Very limited use-cases allow recycle direct */ /* Very limited use-cases allow recycle direct */
......
...@@ -49,7 +49,11 @@ struct seg6_pernet_data { ...@@ -49,7 +49,11 @@ struct seg6_pernet_data {
static inline struct seg6_pernet_data *seg6_pernet(struct net *net) static inline struct seg6_pernet_data *seg6_pernet(struct net *net)
{ {
#if IS_ENABLED(CONFIG_IPV6)
return net->ipv6.seg6_data; return net->ipv6.seg6_data;
#else
return NULL;
#endif
} }
extern int seg6_init(void); extern int seg6_init(void);
...@@ -63,5 +67,6 @@ extern bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len); ...@@ -63,5 +67,6 @@ extern bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len);
extern int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, extern int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh,
int proto); int proto);
extern int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh); extern int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh);
extern int seg6_lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
u32 tbl_id);
#endif #endif
/*
* SR-IPv6 implementation
*
* Authors:
* David Lebrun <david.lebrun@uclouvain.be>
* eBPF support: Mathieu Xhonneux <m.xhonneux@gmail.com>
*
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#ifndef _NET_SEG6_LOCAL_H
#define _NET_SEG6_LOCAL_H
#include <linux/percpu.h>
#include <linux/net.h>
#include <linux/ipv6.h>
extern int seg6_lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
u32 tbl_id);
struct seg6_bpf_srh_state {
bool valid;
u16 hdrlen;
};
DECLARE_PER_CPU(struct seg6_bpf_srh_state, seg6_bpf_srh_states);
#endif
...@@ -104,6 +104,7 @@ struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp) ...@@ -104,6 +104,7 @@ struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp)
} }
void xdp_return_frame(struct xdp_frame *xdpf); void xdp_return_frame(struct xdp_frame *xdpf);
void xdp_return_frame_rx_napi(struct xdp_frame *xdpf);
void xdp_return_buff(struct xdp_buff *xdp); void xdp_return_buff(struct xdp_buff *xdp);
int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq, int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
......
/* SPDX-License-Identifier: GPL-2.0 /* SPDX-License-Identifier: GPL-2.0 */
* AF_XDP internal functions /* AF_XDP internal functions
* Copyright(c) 2018 Intel Corporation. * Copyright(c) 2018 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/ */
#ifndef _LINUX_XDP_SOCK_H #ifndef _LINUX_XDP_SOCK_H
......
...@@ -138,11 +138,18 @@ DEFINE_EVENT_PRINT(xdp_redirect_template, xdp_redirect_map_err, ...@@ -138,11 +138,18 @@ DEFINE_EVENT_PRINT(xdp_redirect_template, xdp_redirect_map_err,
__entry->map_id, __entry->map_index) __entry->map_id, __entry->map_index)
); );
#ifndef __DEVMAP_OBJ_TYPE
#define __DEVMAP_OBJ_TYPE
struct _bpf_dtab_netdev {
struct net_device *dev;
};
#endif /* __DEVMAP_OBJ_TYPE */
#define devmap_ifindex(fwd, map) \ #define devmap_ifindex(fwd, map) \
(!fwd ? 0 : \ (!fwd ? 0 : \
(!map ? 0 : \ (!map ? 0 : \
((map->map_type == BPF_MAP_TYPE_DEVMAP) ? \ ((map->map_type == BPF_MAP_TYPE_DEVMAP) ? \
((struct net_device *)fwd)->ifindex : 0))) ((struct _bpf_dtab_netdev *)fwd)->dev->ifindex : 0)))
#define _trace_xdp_redirect_map(dev, xdp, fwd, map, idx) \ #define _trace_xdp_redirect_map(dev, xdp, fwd, map, idx) \
trace_xdp_redirect_map(dev, xdp, devmap_ifindex(fwd, map), \ trace_xdp_redirect_map(dev, xdp, devmap_ifindex(fwd, map), \
...@@ -222,6 +229,47 @@ TRACE_EVENT(xdp_cpumap_enqueue, ...@@ -222,6 +229,47 @@ TRACE_EVENT(xdp_cpumap_enqueue,
__entry->to_cpu) __entry->to_cpu)
); );
TRACE_EVENT(xdp_devmap_xmit,
TP_PROTO(const struct bpf_map *map, u32 map_index,
int sent, int drops,
const struct net_device *from_dev,
const struct net_device *to_dev, int err),
TP_ARGS(map, map_index, sent, drops, from_dev, to_dev, err),
TP_STRUCT__entry(
__field(int, map_id)
__field(u32, act)
__field(u32, map_index)
__field(int, drops)
__field(int, sent)
__field(int, from_ifindex)
__field(int, to_ifindex)
__field(int, err)
),
TP_fast_assign(
__entry->map_id = map->id;
__entry->act = XDP_REDIRECT;
__entry->map_index = map_index;
__entry->drops = drops;
__entry->sent = sent;
__entry->from_ifindex = from_dev->ifindex;
__entry->to_ifindex = to_dev->ifindex;
__entry->err = err;
),
TP_printk("ndo_xdp_xmit"
" map_id=%d map_index=%d action=%s"
" sent=%d drops=%d"
" from_ifindex=%d to_ifindex=%d err=%d",
__entry->map_id, __entry->map_index,
__print_symbolic(__entry->act, __XDP_ACT_SYM_TAB),
__entry->sent, __entry->drops,
__entry->from_ifindex, __entry->to_ifindex, __entry->err)
);
#endif /* _TRACE_XDP_H */ #endif /* _TRACE_XDP_H */
#include <trace/define_trace.h> #include <trace/define_trace.h>
...@@ -97,6 +97,7 @@ enum bpf_cmd { ...@@ -97,6 +97,7 @@ enum bpf_cmd {
BPF_RAW_TRACEPOINT_OPEN, BPF_RAW_TRACEPOINT_OPEN,
BPF_BTF_LOAD, BPF_BTF_LOAD,
BPF_BTF_GET_FD_BY_ID, BPF_BTF_GET_FD_BY_ID,
BPF_TASK_FD_QUERY,
}; };
enum bpf_map_type { enum bpf_map_type {
...@@ -141,6 +142,7 @@ enum bpf_prog_type { ...@@ -141,6 +142,7 @@ enum bpf_prog_type {
BPF_PROG_TYPE_SK_MSG, BPF_PROG_TYPE_SK_MSG,
BPF_PROG_TYPE_RAW_TRACEPOINT, BPF_PROG_TYPE_RAW_TRACEPOINT,
BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
BPF_PROG_TYPE_LWT_SEG6LOCAL,
}; };
enum bpf_attach_type { enum bpf_attach_type {
...@@ -284,8 +286,8 @@ union bpf_attr { ...@@ -284,8 +286,8 @@ union bpf_attr {
char map_name[BPF_OBJ_NAME_LEN]; char map_name[BPF_OBJ_NAME_LEN];
__u32 map_ifindex; /* ifindex of netdev to create on */ __u32 map_ifindex; /* ifindex of netdev to create on */
__u32 btf_fd; /* fd pointing to a BTF type data */ __u32 btf_fd; /* fd pointing to a BTF type data */
__u32 btf_key_id; /* BTF type_id of the key */ __u32 btf_key_type_id; /* BTF type_id of the key */
__u32 btf_value_id; /* BTF type_id of the value */ __u32 btf_value_type_id; /* BTF type_id of the value */
}; };
struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
...@@ -379,6 +381,22 @@ union bpf_attr { ...@@ -379,6 +381,22 @@ union bpf_attr {
__u32 btf_log_size; __u32 btf_log_size;
__u32 btf_log_level; __u32 btf_log_level;
}; };
struct {
__u32 pid; /* input: pid */
__u32 fd; /* input: fd */
__u32 flags; /* input: flags */
__u32 buf_len; /* input/output: buf len */
__aligned_u64 buf; /* input/output:
* tp_name for tracepoint
* symbol for kprobe
* filename for uprobe
*/
__u32 prog_id; /* output: prod_id */
__u32 fd_type; /* output: BPF_FD_TYPE_* */
__u64 probe_offset; /* output: probe_offset */
__u64 probe_addr; /* output: probe_addr */
} task_fd_query;
} __attribute__((aligned(8))); } __attribute__((aligned(8)));
/* The description below is an attempt at providing documentation to eBPF /* The description below is an attempt at providing documentation to eBPF
...@@ -1902,6 +1920,90 @@ union bpf_attr { ...@@ -1902,6 +1920,90 @@ union bpf_attr {
* egress otherwise). This is the only flag supported for now. * egress otherwise). This is the only flag supported for now.
* Return * Return
* **SK_PASS** on success, or **SK_DROP** on error. * **SK_PASS** on success, or **SK_DROP** on error.
*
* int bpf_lwt_push_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len)
* Description
* Encapsulate the packet associated to *skb* within a Layer 3
* protocol header. This header is provided in the buffer at
* address *hdr*, with *len* its size in bytes. *type* indicates
* the protocol of the header and can be one of:
*
* **BPF_LWT_ENCAP_SEG6**
* IPv6 encapsulation with Segment Routing Header
* (**struct ipv6_sr_hdr**). *hdr* only contains the SRH,
* the IPv6 header is computed by the kernel.
* **BPF_LWT_ENCAP_SEG6_INLINE**
* Only works if *skb* contains an IPv6 packet. Insert a
* Segment Routing Header (**struct ipv6_sr_hdr**) inside
* the IPv6 header.
*
* A call to this helper is susceptible to change the underlaying
* packet buffer. Therefore, at load time, all checks on pointers
* previously done by the verifier are invalidated and must be
* performed again, if the helper is used in combination with
* direct packet access.
* Return
* 0 on success, or a negative error in case of failure.
*
* int bpf_lwt_seg6_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len)
* Description
* Store *len* bytes from address *from* into the packet
* associated to *skb*, at *offset*. Only the flags, tag and TLVs
* inside the outermost IPv6 Segment Routing Header can be
* modified through this helper.
*
* A call to this helper is susceptible to change the underlaying
* packet buffer. Therefore, at load time, all checks on pointers
* previously done by the verifier are invalidated and must be
* performed again, if the helper is used in combination with
* direct packet access.
* Return
* 0 on success, or a negative error in case of failure.
*
* int bpf_lwt_seg6_adjust_srh(struct sk_buff *skb, u32 offset, s32 delta)
* Description
* Adjust the size allocated to TLVs in the outermost IPv6
* Segment Routing Header contained in the packet associated to
* *skb*, at position *offset* by *delta* bytes. Only offsets
* after the segments are accepted. *delta* can be as well
* positive (growing) as negative (shrinking).
*
* A call to this helper is susceptible to change the underlaying
* packet buffer. Therefore, at load time, all checks on pointers
* previously done by the verifier are invalidated and must be
* performed again, if the helper is used in combination with
* direct packet access.
* Return
* 0 on success, or a negative error in case of failure.
*
* int bpf_lwt_seg6_action(struct sk_buff *skb, u32 action, void *param, u32 param_len)
* Description
* Apply an IPv6 Segment Routing action of type *action* to the
* packet associated to *skb*. Each action takes a parameter
* contained at address *param*, and of length *param_len* bytes.
* *action* can be one of:
*
* **SEG6_LOCAL_ACTION_END_X**
* End.X action: Endpoint with Layer-3 cross-connect.
* Type of *param*: **struct in6_addr**.
* **SEG6_LOCAL_ACTION_END_T**
* End.T action: Endpoint with specific IPv6 table lookup.
* Type of *param*: **int**.
* **SEG6_LOCAL_ACTION_END_B6**
* End.B6 action: Endpoint bound to an SRv6 policy.
* Type of param: **struct ipv6_sr_hdr**.
* **SEG6_LOCAL_ACTION_END_B6_ENCAP**
* End.B6.Encap action: Endpoint bound to an SRv6
* encapsulation policy.
* Type of param: **struct ipv6_sr_hdr**.
*
* A call to this helper is susceptible to change the underlaying
* packet buffer. Therefore, at load time, all checks on pointers
* previously done by the verifier are invalidated and must be
* performed again, if the helper is used in combination with
* direct packet access.
* Return
* 0 on success, or a negative error in case of failure.
*/ */
#define __BPF_FUNC_MAPPER(FN) \ #define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \ FN(unspec), \
...@@ -1976,7 +2078,11 @@ union bpf_attr { ...@@ -1976,7 +2078,11 @@ union bpf_attr {
FN(fib_lookup), \ FN(fib_lookup), \
FN(sock_hash_update), \ FN(sock_hash_update), \
FN(msg_redirect_hash), \ FN(msg_redirect_hash), \
FN(sk_redirect_hash), FN(sk_redirect_hash), \
FN(lwt_push_encap), \
FN(lwt_seg6_store_bytes), \
FN(lwt_seg6_adjust_srh), \
FN(lwt_seg6_action),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper /* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call * function eBPF program intends to call
...@@ -2043,6 +2149,12 @@ enum bpf_hdr_start_off { ...@@ -2043,6 +2149,12 @@ enum bpf_hdr_start_off {
BPF_HDR_START_NET, BPF_HDR_START_NET,
}; };
/* Encapsulation type for BPF_FUNC_lwt_push_encap helper. */
enum bpf_lwt_encap_mode {
BPF_LWT_ENCAP_SEG6,
BPF_LWT_ENCAP_SEG6_INLINE
};
/* user accessible mirror of in-kernel sk_buff. /* user accessible mirror of in-kernel sk_buff.
* new fields can only be added to the end of this structure * new fields can only be added to the end of this structure
*/ */
...@@ -2176,6 +2288,14 @@ enum sk_action { ...@@ -2176,6 +2288,14 @@ enum sk_action {
struct sk_msg_md { struct sk_msg_md {
void *data; void *data;
void *data_end; void *data_end;
__u32 family;
__u32 remote_ip4; /* Stored in network byte order */
__u32 local_ip4; /* Stored in network byte order */
__u32 remote_ip6[4]; /* Stored in network byte order */
__u32 local_ip6[4]; /* Stored in network byte order */
__u32 remote_port; /* Stored in network byte order */
__u32 local_port; /* stored in host byte order */
}; };
#define BPF_TAG_SIZE 8 #define BPF_TAG_SIZE 8
...@@ -2197,6 +2317,10 @@ struct bpf_prog_info { ...@@ -2197,6 +2317,10 @@ struct bpf_prog_info {
__u32 gpl_compatible:1; __u32 gpl_compatible:1;
__u64 netns_dev; __u64 netns_dev;
__u64 netns_ino; __u64 netns_ino;
__u32 nr_jited_ksyms;
__u32 nr_jited_func_lens;
__aligned_u64 jited_ksyms;
__aligned_u64 jited_func_lens;
} __attribute__((aligned(8))); } __attribute__((aligned(8)));
struct bpf_map_info { struct bpf_map_info {
...@@ -2211,8 +2335,8 @@ struct bpf_map_info { ...@@ -2211,8 +2335,8 @@ struct bpf_map_info {
__u64 netns_dev; __u64 netns_dev;
__u64 netns_ino; __u64 netns_ino;
__u32 btf_id; __u32 btf_id;
__u32 btf_key_id; __u32 btf_key_type_id;
__u32 btf_value_id; __u32 btf_value_type_id;
} __attribute__((aligned(8))); } __attribute__((aligned(8)));
struct bpf_btf_info { struct bpf_btf_info {
...@@ -2450,4 +2574,13 @@ struct bpf_fib_lookup { ...@@ -2450,4 +2574,13 @@ struct bpf_fib_lookup {
__u8 dmac[6]; /* ETH_ALEN */ __u8 dmac[6]; /* ETH_ALEN */
}; };
enum bpf_task_fd_type {
BPF_FD_TYPE_RAW_TRACEPOINT, /* tp name */
BPF_FD_TYPE_TRACEPOINT, /* tp name */
BPF_FD_TYPE_KPROBE, /* (symbol + offset) or addr */
BPF_FD_TYPE_KRETPROBE, /* (symbol + offset) or addr */
BPF_FD_TYPE_UPROBE, /* filename + offset */
BPF_FD_TYPE_URETPROBE, /* filename + offset */
};
#endif /* _UAPI__LINUX_BPF_H__ */ #endif /* _UAPI__LINUX_BPF_H__ */
...@@ -12,42 +12,29 @@ struct btf_header { ...@@ -12,42 +12,29 @@ struct btf_header {
__u16 magic; __u16 magic;
__u8 version; __u8 version;
__u8 flags; __u8 flags;
__u32 hdr_len;
__u32 parent_label;
__u32 parent_name;
/* All offsets are in bytes relative to the end of this header */ /* All offsets are in bytes relative to the end of this header */
__u32 label_off; /* offset of label section */
__u32 object_off; /* offset of data object section*/
__u32 func_off; /* offset of function section */
__u32 type_off; /* offset of type section */ __u32 type_off; /* offset of type section */
__u32 type_len; /* length of type section */
__u32 str_off; /* offset of string section */ __u32 str_off; /* offset of string section */
__u32 str_len; /* length of string section */ __u32 str_len; /* length of string section */
}; };
/* Max # of type identifier */ /* Max # of type identifier */
#define BTF_MAX_TYPE 0x7fffffff #define BTF_MAX_TYPE 0x0000ffff
/* Max offset into the string section */ /* Max offset into the string section */
#define BTF_MAX_NAME_OFFSET 0x7fffffff #define BTF_MAX_NAME_OFFSET 0x0000ffff
/* Max # of struct/union/enum members or func args */ /* Max # of struct/union/enum members or func args */
#define BTF_MAX_VLEN 0xffff #define BTF_MAX_VLEN 0xffff
/* The type id is referring to a parent BTF */
#define BTF_TYPE_PARENT(id) (((id) >> 31) & 0x1)
#define BTF_TYPE_ID(id) ((id) & BTF_MAX_TYPE)
/* String is in the ELF string section */
#define BTF_STR_TBL_ELF_ID(ref) (((ref) >> 31) & 0x1)
#define BTF_STR_OFFSET(ref) ((ref) & BTF_MAX_NAME_OFFSET)
struct btf_type { struct btf_type {
__u32 name_off; __u32 name_off;
/* "info" bits arrangement /* "info" bits arrangement
* bits 0-15: vlen (e.g. # of struct's members) * bits 0-15: vlen (e.g. # of struct's members)
* bits 16-23: unused * bits 16-23: unused
* bits 24-28: kind (e.g. int, ptr, array...etc) * bits 24-27: kind (e.g. int, ptr, array...etc)
* bits 29-30: unused * bits 28-31: unused
* bits 31: root
*/ */
__u32 info; __u32 info;
/* "size" is used by INT, ENUM, STRUCT and UNION. /* "size" is used by INT, ENUM, STRUCT and UNION.
...@@ -62,8 +49,7 @@ struct btf_type { ...@@ -62,8 +49,7 @@ struct btf_type {
}; };
}; };
#define BTF_INFO_KIND(info) (((info) >> 24) & 0x1f) #define BTF_INFO_KIND(info) (((info) >> 24) & 0x0f)
#define BTF_INFO_ISROOT(info) (!!(((info) >> 24) & 0x80))
#define BTF_INFO_VLEN(info) ((info) & 0xffff) #define BTF_INFO_VLEN(info) ((info) & 0xffff)
#define BTF_KIND_UNKN 0 /* Unknown */ #define BTF_KIND_UNKN 0 /* Unknown */
...@@ -88,15 +74,14 @@ struct btf_type { ...@@ -88,15 +74,14 @@ struct btf_type {
/* BTF_KIND_INT is followed by a u32 and the following /* BTF_KIND_INT is followed by a u32 and the following
* is the 32 bits arrangement: * is the 32 bits arrangement:
*/ */
#define BTF_INT_ENCODING(VAL) (((VAL) & 0xff000000) >> 24) #define BTF_INT_ENCODING(VAL) (((VAL) & 0x0f000000) >> 24)
#define BTF_INT_OFFSET(VAL) (((VAL & 0x00ff0000)) >> 16) #define BTF_INT_OFFSET(VAL) (((VAL & 0x00ff0000)) >> 16)
#define BTF_INT_BITS(VAL) ((VAL) & 0x0000ffff) #define BTF_INT_BITS(VAL) ((VAL) & 0x0000ffff)
/* Attributes stored in the BTF_INT_ENCODING */ /* Attributes stored in the BTF_INT_ENCODING */
#define BTF_INT_SIGNED 0x1 #define BTF_INT_SIGNED (1 << 0)
#define BTF_INT_CHAR 0x2 #define BTF_INT_CHAR (1 << 1)
#define BTF_INT_BOOL 0x4 #define BTF_INT_BOOL (1 << 2)
#define BTF_INT_VARARGS 0x8
/* BTF_KIND_ENUM is followed by multiple "struct btf_enum". /* BTF_KIND_ENUM is followed by multiple "struct btf_enum".
* The exact number of btf_enum is stored in the vlen (of the * The exact number of btf_enum is stored in the vlen (of the
......
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
* /*
* if_xdp: XDP socket user-space interface * if_xdp: XDP socket user-space interface
* Copyright(c) 2018 Intel Corporation. * Copyright(c) 2018 Intel Corporation.
* *
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* Author(s): Björn Töpel <bjorn.topel@intel.com> * Author(s): Björn Töpel <bjorn.topel@intel.com>
* Magnus Karlsson <magnus.karlsson@intel.com> * Magnus Karlsson <magnus.karlsson@intel.com>
*/ */
...@@ -26,19 +17,33 @@ ...@@ -26,19 +17,33 @@
struct sockaddr_xdp { struct sockaddr_xdp {
__u16 sxdp_family; __u16 sxdp_family;
__u16 sxdp_flags;
__u32 sxdp_ifindex; __u32 sxdp_ifindex;
__u32 sxdp_queue_id; __u32 sxdp_queue_id;
__u32 sxdp_shared_umem_fd; __u32 sxdp_shared_umem_fd;
__u16 sxdp_flags; };
struct xdp_ring_offset {
__u64 producer;
__u64 consumer;
__u64 desc;
};
struct xdp_mmap_offsets {
struct xdp_ring_offset rx;
struct xdp_ring_offset tx;
struct xdp_ring_offset fr; /* Fill */
struct xdp_ring_offset cr; /* Completion */
}; };
/* XDP socket options */ /* XDP socket options */
#define XDP_RX_RING 1 #define XDP_MMAP_OFFSETS 1
#define XDP_TX_RING 2 #define XDP_RX_RING 2
#define XDP_UMEM_REG 3 #define XDP_TX_RING 3
#define XDP_UMEM_FILL_RING 4 #define XDP_UMEM_REG 4
#define XDP_UMEM_COMPLETION_RING 5 #define XDP_UMEM_FILL_RING 5
#define XDP_STATISTICS 6 #define XDP_UMEM_COMPLETION_RING 6
#define XDP_STATISTICS 7
struct xdp_umem_reg { struct xdp_umem_reg {
__u64 addr; /* Start of packet data area */ __u64 addr; /* Start of packet data area */
...@@ -59,6 +64,7 @@ struct xdp_statistics { ...@@ -59,6 +64,7 @@ struct xdp_statistics {
#define XDP_UMEM_PGOFF_FILL_RING 0x100000000 #define XDP_UMEM_PGOFF_FILL_RING 0x100000000
#define XDP_UMEM_PGOFF_COMPLETION_RING 0x180000000 #define XDP_UMEM_PGOFF_COMPLETION_RING 0x180000000
/* Rx/Tx descriptor */
struct xdp_desc { struct xdp_desc {
__u32 idx; __u32 idx;
__u32 len; __u32 len;
...@@ -67,21 +73,6 @@ struct xdp_desc { ...@@ -67,21 +73,6 @@ struct xdp_desc {
__u8 padding[5]; __u8 padding[5];
}; };
struct xdp_ring { /* UMEM descriptor is __u32 */
__u32 producer __attribute__((aligned(64)));
__u32 consumer __attribute__((aligned(64)));
};
/* Used for the RX and TX queues for packets */
struct xdp_rxtx_ring {
struct xdp_ring ptrs;
struct xdp_desc desc[0] __attribute__((aligned(64)));
};
/* Used for the fill and completion queues for buffers */
struct xdp_umem_ring {
struct xdp_ring ptrs;
__u32 desc[0] __attribute__((aligned(64)));
};
#endif /* _LINUX_IF_XDP_H */ #endif /* _LINUX_IF_XDP_H */
...@@ -25,6 +25,7 @@ enum { ...@@ -25,6 +25,7 @@ enum {
SEG6_LOCAL_NH6, SEG6_LOCAL_NH6,
SEG6_LOCAL_IIF, SEG6_LOCAL_IIF,
SEG6_LOCAL_OIF, SEG6_LOCAL_OIF,
SEG6_LOCAL_BPF,
__SEG6_LOCAL_MAX, __SEG6_LOCAL_MAX,
}; };
#define SEG6_LOCAL_MAX (__SEG6_LOCAL_MAX - 1) #define SEG6_LOCAL_MAX (__SEG6_LOCAL_MAX - 1)
...@@ -59,10 +60,21 @@ enum { ...@@ -59,10 +60,21 @@ enum {
SEG6_LOCAL_ACTION_END_AS = 13, SEG6_LOCAL_ACTION_END_AS = 13,
/* forward to SR-unaware VNF with masquerading */ /* forward to SR-unaware VNF with masquerading */
SEG6_LOCAL_ACTION_END_AM = 14, SEG6_LOCAL_ACTION_END_AM = 14,
/* custom BPF action */
SEG6_LOCAL_ACTION_END_BPF = 15,
__SEG6_LOCAL_ACTION_MAX, __SEG6_LOCAL_ACTION_MAX,
}; };
#define SEG6_LOCAL_ACTION_MAX (__SEG6_LOCAL_ACTION_MAX - 1) #define SEG6_LOCAL_ACTION_MAX (__SEG6_LOCAL_ACTION_MAX - 1)
enum {
SEG6_LOCAL_BPF_PROG_UNSPEC,
SEG6_LOCAL_BPF_PROG,
SEG6_LOCAL_BPF_PROG_NAME,
__SEG6_LOCAL_BPF_PROG_MAX,
};
#define SEG6_LOCAL_BPF_PROG_MAX (__SEG6_LOCAL_BPF_PROG_MAX - 1)
#endif #endif
...@@ -352,7 +352,7 @@ static void array_map_seq_show_elem(struct bpf_map *map, void *key, ...@@ -352,7 +352,7 @@ static void array_map_seq_show_elem(struct bpf_map *map, void *key,
} }
seq_printf(m, "%u: ", *(u32 *)key); seq_printf(m, "%u: ", *(u32 *)key);
btf_type_seq_show(map->btf, map->btf_value_id, value, m); btf_type_seq_show(map->btf, map->btf_value_type_id, value, m);
seq_puts(m, "\n"); seq_puts(m, "\n");
rcu_read_unlock(); rcu_read_unlock();
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
#include <linux/uaccess.h> #include <linux/uaccess.h>
#include <linux/kernel.h> #include <linux/kernel.h>
#include <linux/idr.h> #include <linux/idr.h>
#include <linux/sort.h>
#include <linux/bpf_verifier.h> #include <linux/bpf_verifier.h>
#include <linux/btf.h> #include <linux/btf.h>
...@@ -162,13 +163,16 @@ ...@@ -162,13 +163,16 @@
#define BITS_ROUNDUP_BYTES(bits) \ #define BITS_ROUNDUP_BYTES(bits) \
(BITS_ROUNDDOWN_BYTES(bits) + !!BITS_PER_BYTE_MASKED(bits)) (BITS_ROUNDDOWN_BYTES(bits) + !!BITS_PER_BYTE_MASKED(bits))
#define BTF_INFO_MASK 0x0f00ffff
#define BTF_INT_MASK 0x0fffffff
#define BTF_TYPE_ID_VALID(type_id) ((type_id) <= BTF_MAX_TYPE)
#define BTF_STR_OFFSET_VALID(name_off) ((name_off) <= BTF_MAX_NAME_OFFSET)
/* 16MB for 64k structs and each has 16 members and /* 16MB for 64k structs and each has 16 members and
* a few MB spaces for the string section. * a few MB spaces for the string section.
* The hard limit is S32_MAX. * The hard limit is S32_MAX.
*/ */
#define BTF_MAX_SIZE (16 * 1024 * 1024) #define BTF_MAX_SIZE (16 * 1024 * 1024)
/* 64k. We can raise it later. The hard limit is S32_MAX. */
#define BTF_MAX_NR_TYPES 65535
#define for_each_member(i, struct_type, member) \ #define for_each_member(i, struct_type, member) \
for (i = 0, member = btf_type_member(struct_type); \ for (i = 0, member = btf_type_member(struct_type); \
...@@ -184,15 +188,13 @@ static DEFINE_IDR(btf_idr); ...@@ -184,15 +188,13 @@ static DEFINE_IDR(btf_idr);
static DEFINE_SPINLOCK(btf_idr_lock); static DEFINE_SPINLOCK(btf_idr_lock);
struct btf { struct btf {
union { void *data;
struct btf_header *hdr;
void *data;
};
struct btf_type **types; struct btf_type **types;
u32 *resolved_ids; u32 *resolved_ids;
u32 *resolved_sizes; u32 *resolved_sizes;
const char *strings; const char *strings;
void *nohdr_data; void *nohdr_data;
struct btf_header hdr;
u32 nr_types; u32 nr_types;
u32 types_size; u32 types_size;
u32 data_size; u32 data_size;
...@@ -228,6 +230,11 @@ enum resolve_mode { ...@@ -228,6 +230,11 @@ enum resolve_mode {
#define MAX_RESOLVE_DEPTH 32 #define MAX_RESOLVE_DEPTH 32
struct btf_sec_info {
u32 off;
u32 len;
};
struct btf_verifier_env { struct btf_verifier_env {
struct btf *btf; struct btf *btf;
u8 *visit_states; u8 *visit_states;
...@@ -379,8 +386,6 @@ static const char *btf_int_encoding_str(u8 encoding) ...@@ -379,8 +386,6 @@ static const char *btf_int_encoding_str(u8 encoding)
return "CHAR"; return "CHAR";
else if (encoding == BTF_INT_BOOL) else if (encoding == BTF_INT_BOOL)
return "BOOL"; return "BOOL";
else if (encoding == BTF_INT_VARARGS)
return "VARARGS";
else else
return "UNKN"; return "UNKN";
} }
...@@ -417,16 +422,16 @@ static const struct btf_kind_operations *btf_type_ops(const struct btf_type *t) ...@@ -417,16 +422,16 @@ static const struct btf_kind_operations *btf_type_ops(const struct btf_type *t)
static bool btf_name_offset_valid(const struct btf *btf, u32 offset) static bool btf_name_offset_valid(const struct btf *btf, u32 offset)
{ {
return !BTF_STR_TBL_ELF_ID(offset) && return BTF_STR_OFFSET_VALID(offset) &&
BTF_STR_OFFSET(offset) < btf->hdr->str_len; offset < btf->hdr.str_len;
} }
static const char *btf_name_by_offset(const struct btf *btf, u32 offset) static const char *btf_name_by_offset(const struct btf *btf, u32 offset)
{ {
if (!BTF_STR_OFFSET(offset)) if (!offset)
return "(anon)"; return "(anon)";
else if (BTF_STR_OFFSET(offset) < btf->hdr->str_len) else if (offset < btf->hdr.str_len)
return &btf->strings[BTF_STR_OFFSET(offset)]; return &btf->strings[offset];
else else
return "(invalid-name-offset)"; return "(invalid-name-offset)";
} }
...@@ -439,6 +444,28 @@ static const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id) ...@@ -439,6 +444,28 @@ static const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id)
return btf->types[type_id]; return btf->types[type_id];
} }
/*
* Regular int is not a bit field and it must be either
* u8/u16/u32/u64.
*/
static bool btf_type_int_is_regular(const struct btf_type *t)
{
u16 nr_bits, nr_bytes;
u32 int_data;
int_data = btf_type_int(t);
nr_bits = BTF_INT_BITS(int_data);
nr_bytes = BITS_ROUNDUP_BYTES(nr_bits);
if (BITS_PER_BYTE_MASKED(nr_bits) ||
BTF_INT_OFFSET(int_data) ||
(nr_bytes != sizeof(u8) && nr_bytes != sizeof(u16) &&
nr_bytes != sizeof(u32) && nr_bytes != sizeof(u64))) {
return false;
}
return true;
}
__printf(2, 3) static void __btf_verifier_log(struct bpf_verifier_log *log, __printf(2, 3) static void __btf_verifier_log(struct bpf_verifier_log *log,
const char *fmt, ...) const char *fmt, ...)
{ {
...@@ -536,7 +563,8 @@ static void btf_verifier_log_member(struct btf_verifier_env *env, ...@@ -536,7 +563,8 @@ static void btf_verifier_log_member(struct btf_verifier_env *env,
__btf_verifier_log(log, "\n"); __btf_verifier_log(log, "\n");
} }
static void btf_verifier_log_hdr(struct btf_verifier_env *env) static void btf_verifier_log_hdr(struct btf_verifier_env *env,
u32 btf_data_size)
{ {
struct bpf_verifier_log *log = &env->log; struct bpf_verifier_log *log = &env->log;
const struct btf *btf = env->btf; const struct btf *btf = env->btf;
...@@ -545,19 +573,16 @@ static void btf_verifier_log_hdr(struct btf_verifier_env *env) ...@@ -545,19 +573,16 @@ static void btf_verifier_log_hdr(struct btf_verifier_env *env)
if (!bpf_verifier_log_needed(log)) if (!bpf_verifier_log_needed(log))
return; return;
hdr = btf->hdr; hdr = &btf->hdr;
__btf_verifier_log(log, "magic: 0x%x\n", hdr->magic); __btf_verifier_log(log, "magic: 0x%x\n", hdr->magic);
__btf_verifier_log(log, "version: %u\n", hdr->version); __btf_verifier_log(log, "version: %u\n", hdr->version);
__btf_verifier_log(log, "flags: 0x%x\n", hdr->flags); __btf_verifier_log(log, "flags: 0x%x\n", hdr->flags);
__btf_verifier_log(log, "parent_label: %u\n", hdr->parent_label); __btf_verifier_log(log, "hdr_len: %u\n", hdr->hdr_len);
__btf_verifier_log(log, "parent_name: %u\n", hdr->parent_name);
__btf_verifier_log(log, "label_off: %u\n", hdr->label_off);
__btf_verifier_log(log, "object_off: %u\n", hdr->object_off);
__btf_verifier_log(log, "func_off: %u\n", hdr->func_off);
__btf_verifier_log(log, "type_off: %u\n", hdr->type_off); __btf_verifier_log(log, "type_off: %u\n", hdr->type_off);
__btf_verifier_log(log, "type_len: %u\n", hdr->type_len);
__btf_verifier_log(log, "str_off: %u\n", hdr->str_off); __btf_verifier_log(log, "str_off: %u\n", hdr->str_off);
__btf_verifier_log(log, "str_len: %u\n", hdr->str_len); __btf_verifier_log(log, "str_len: %u\n", hdr->str_len);
__btf_verifier_log(log, "btf_total_size: %u\n", btf->data_size); __btf_verifier_log(log, "btf_total_size: %u\n", btf_data_size);
} }
static int btf_add_type(struct btf_verifier_env *env, struct btf_type *t) static int btf_add_type(struct btf_verifier_env *env, struct btf_type *t)
...@@ -574,13 +599,13 @@ static int btf_add_type(struct btf_verifier_env *env, struct btf_type *t) ...@@ -574,13 +599,13 @@ static int btf_add_type(struct btf_verifier_env *env, struct btf_type *t)
struct btf_type **new_types; struct btf_type **new_types;
u32 expand_by, new_size; u32 expand_by, new_size;
if (btf->types_size == BTF_MAX_NR_TYPES) { if (btf->types_size == BTF_MAX_TYPE) {
btf_verifier_log(env, "Exceeded max num of types"); btf_verifier_log(env, "Exceeded max num of types");
return -E2BIG; return -E2BIG;
} }
expand_by = max_t(u32, btf->types_size >> 2, 16); expand_by = max_t(u32, btf->types_size >> 2, 16);
new_size = min_t(u32, BTF_MAX_NR_TYPES, new_size = min_t(u32, BTF_MAX_TYPE,
btf->types_size + expand_by); btf->types_size + expand_by);
new_types = kvzalloc(new_size * sizeof(*new_types), new_types = kvzalloc(new_size * sizeof(*new_types),
...@@ -910,6 +935,12 @@ static s32 btf_int_check_meta(struct btf_verifier_env *env, ...@@ -910,6 +935,12 @@ static s32 btf_int_check_meta(struct btf_verifier_env *env,
} }
int_data = btf_type_int(t); int_data = btf_type_int(t);
if (int_data & ~BTF_INT_MASK) {
btf_verifier_log_basic(env, t, "Invalid int_data:%x",
int_data);
return -EINVAL;
}
nr_bits = BTF_INT_BITS(int_data) + BTF_INT_OFFSET(int_data); nr_bits = BTF_INT_BITS(int_data) + BTF_INT_OFFSET(int_data);
if (nr_bits > BITS_PER_U64) { if (nr_bits > BITS_PER_U64) {
...@@ -923,12 +954,17 @@ static s32 btf_int_check_meta(struct btf_verifier_env *env, ...@@ -923,12 +954,17 @@ static s32 btf_int_check_meta(struct btf_verifier_env *env,
return -EINVAL; return -EINVAL;
} }
/*
* Only one of the encoding bits is allowed and it
* should be sufficient for the pretty print purpose (i.e. decoding).
* Multiple bits can be allowed later if it is found
* to be insufficient.
*/
encoding = BTF_INT_ENCODING(int_data); encoding = BTF_INT_ENCODING(int_data);
if (encoding && if (encoding &&
encoding != BTF_INT_SIGNED && encoding != BTF_INT_SIGNED &&
encoding != BTF_INT_CHAR && encoding != BTF_INT_CHAR &&
encoding != BTF_INT_BOOL && encoding != BTF_INT_BOOL) {
encoding != BTF_INT_VARARGS) {
btf_verifier_log_type(env, t, "Unsupported encoding"); btf_verifier_log_type(env, t, "Unsupported encoding");
return -ENOTSUPP; return -ENOTSUPP;
} }
...@@ -1102,7 +1138,7 @@ static int btf_ref_type_check_meta(struct btf_verifier_env *env, ...@@ -1102,7 +1138,7 @@ static int btf_ref_type_check_meta(struct btf_verifier_env *env,
return -EINVAL; return -EINVAL;
} }
if (BTF_TYPE_PARENT(t->type)) { if (!BTF_TYPE_ID_VALID(t->type)) {
btf_verifier_log_type(env, t, "Invalid type_id"); btf_verifier_log_type(env, t, "Invalid type_id");
return -EINVAL; return -EINVAL;
} }
...@@ -1306,14 +1342,16 @@ static s32 btf_array_check_meta(struct btf_verifier_env *env, ...@@ -1306,14 +1342,16 @@ static s32 btf_array_check_meta(struct btf_verifier_env *env,
return -EINVAL; return -EINVAL;
} }
/* We are a little forgiving on array->index_type since /* Array elem type and index type cannot be in type void,
* the kernel is not using it. * so !array->type and !array->index_type are not allowed.
*/
/* Array elem cannot be in type void,
* so !array->type is not allowed.
*/ */
if (!array->type || BTF_TYPE_PARENT(array->type)) { if (!array->type || !BTF_TYPE_ID_VALID(array->type)) {
btf_verifier_log_type(env, t, "Invalid type_id"); btf_verifier_log_type(env, t, "Invalid elem");
return -EINVAL;
}
if (!array->index_type || !BTF_TYPE_ID_VALID(array->index_type)) {
btf_verifier_log_type(env, t, "Invalid index");
return -EINVAL; return -EINVAL;
} }
...@@ -1326,11 +1364,32 @@ static int btf_array_resolve(struct btf_verifier_env *env, ...@@ -1326,11 +1364,32 @@ static int btf_array_resolve(struct btf_verifier_env *env,
const struct resolve_vertex *v) const struct resolve_vertex *v)
{ {
const struct btf_array *array = btf_type_array(v->t); const struct btf_array *array = btf_type_array(v->t);
const struct btf_type *elem_type; const struct btf_type *elem_type, *index_type;
u32 elem_type_id = array->type; u32 elem_type_id, index_type_id;
struct btf *btf = env->btf; struct btf *btf = env->btf;
u32 elem_size; u32 elem_size;
/* Check array->index_type */
index_type_id = array->index_type;
index_type = btf_type_by_id(btf, index_type_id);
if (btf_type_is_void_or_null(index_type)) {
btf_verifier_log_type(env, v->t, "Invalid index");
return -EINVAL;
}
if (!env_type_is_resolve_sink(env, index_type) &&
!env_type_is_resolved(env, index_type_id))
return env_stack_push(env, index_type, index_type_id);
index_type = btf_type_id_size(btf, &index_type_id, NULL);
if (!index_type || !btf_type_is_int(index_type) ||
!btf_type_int_is_regular(index_type)) {
btf_verifier_log_type(env, v->t, "Invalid index");
return -EINVAL;
}
/* Check array->type */
elem_type_id = array->type;
elem_type = btf_type_by_id(btf, elem_type_id); elem_type = btf_type_by_id(btf, elem_type_id);
if (btf_type_is_void_or_null(elem_type)) { if (btf_type_is_void_or_null(elem_type)) {
btf_verifier_log_type(env, v->t, btf_verifier_log_type(env, v->t,
...@@ -1348,22 +1407,9 @@ static int btf_array_resolve(struct btf_verifier_env *env, ...@@ -1348,22 +1407,9 @@ static int btf_array_resolve(struct btf_verifier_env *env,
return -EINVAL; return -EINVAL;
} }
if (btf_type_is_int(elem_type)) { if (btf_type_is_int(elem_type) && !btf_type_int_is_regular(elem_type)) {
int int_type_data = btf_type_int(elem_type); btf_verifier_log_type(env, v->t, "Invalid array of int");
u16 nr_bits = BTF_INT_BITS(int_type_data); return -EINVAL;
u16 nr_bytes = BITS_ROUNDUP_BYTES(nr_bits);
/* Put more restriction on array of int. The int cannot
* be a bit field and it must be either u8/u16/u32/u64.
*/
if (BITS_PER_BYTE_MASKED(nr_bits) ||
BTF_INT_OFFSET(int_type_data) ||
(nr_bytes != sizeof(u8) && nr_bytes != sizeof(u16) &&
nr_bytes != sizeof(u32) && nr_bytes != sizeof(u64))) {
btf_verifier_log_type(env, v->t,
"Invalid array of int");
return -EINVAL;
}
} }
if (array->nelems && elem_size > U32_MAX / array->nelems) { if (array->nelems && elem_size > U32_MAX / array->nelems) {
...@@ -1473,7 +1519,7 @@ static s32 btf_struct_check_meta(struct btf_verifier_env *env, ...@@ -1473,7 +1519,7 @@ static s32 btf_struct_check_meta(struct btf_verifier_env *env,
} }
/* A member cannot be in type void */ /* A member cannot be in type void */
if (!member->type || BTF_TYPE_PARENT(member->type)) { if (!member->type || !BTF_TYPE_ID_VALID(member->type)) {
btf_verifier_log_member(env, t, member, btf_verifier_log_member(env, t, member,
"Invalid type_id"); "Invalid type_id");
return -EINVAL; return -EINVAL;
...@@ -1726,6 +1772,12 @@ static s32 btf_check_meta(struct btf_verifier_env *env, ...@@ -1726,6 +1772,12 @@ static s32 btf_check_meta(struct btf_verifier_env *env,
} }
meta_left -= sizeof(*t); meta_left -= sizeof(*t);
if (t->info & ~BTF_INFO_MASK) {
btf_verifier_log(env, "[%u] Invalid btf_info:%x",
env->log_type_id, t->info);
return -EINVAL;
}
if (BTF_INFO_KIND(t->info) > BTF_KIND_MAX || if (BTF_INFO_KIND(t->info) > BTF_KIND_MAX ||
BTF_INFO_KIND(t->info) == BTF_KIND_UNKN) { BTF_INFO_KIND(t->info) == BTF_KIND_UNKN) {
btf_verifier_log(env, "[%u] Invalid kind:%u", btf_verifier_log(env, "[%u] Invalid kind:%u",
...@@ -1754,9 +1806,9 @@ static int btf_check_all_metas(struct btf_verifier_env *env) ...@@ -1754,9 +1806,9 @@ static int btf_check_all_metas(struct btf_verifier_env *env)
struct btf_header *hdr; struct btf_header *hdr;
void *cur, *end; void *cur, *end;
hdr = btf->hdr; hdr = &btf->hdr;
cur = btf->nohdr_data + hdr->type_off; cur = btf->nohdr_data + hdr->type_off;
end = btf->nohdr_data + hdr->str_off; end = btf->nohdr_data + hdr->type_len;
env->log_type_id = 1; env->log_type_id = 1;
while (cur < end) { while (cur < end) {
...@@ -1866,8 +1918,20 @@ static int btf_check_all_types(struct btf_verifier_env *env) ...@@ -1866,8 +1918,20 @@ static int btf_check_all_types(struct btf_verifier_env *env)
static int btf_parse_type_sec(struct btf_verifier_env *env) static int btf_parse_type_sec(struct btf_verifier_env *env)
{ {
const struct btf_header *hdr = &env->btf->hdr;
int err; int err;
/* Type section must align to 4 bytes */
if (hdr->type_off & (sizeof(u32) - 1)) {
btf_verifier_log(env, "Unaligned type_off");
return -EINVAL;
}
if (!hdr->type_len) {
btf_verifier_log(env, "No type found");
return -EINVAL;
}
err = btf_check_all_metas(env); err = btf_check_all_metas(env);
if (err) if (err)
return err; return err;
...@@ -1881,10 +1945,15 @@ static int btf_parse_str_sec(struct btf_verifier_env *env) ...@@ -1881,10 +1945,15 @@ static int btf_parse_str_sec(struct btf_verifier_env *env)
struct btf *btf = env->btf; struct btf *btf = env->btf;
const char *start, *end; const char *start, *end;
hdr = btf->hdr; hdr = &btf->hdr;
start = btf->nohdr_data + hdr->str_off; start = btf->nohdr_data + hdr->str_off;
end = start + hdr->str_len; end = start + hdr->str_len;
if (end != btf->data + btf->data_size) {
btf_verifier_log(env, "String section is not at the end");
return -EINVAL;
}
if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_NAME_OFFSET || if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_NAME_OFFSET ||
start[0] || end[-1]) { start[0] || end[-1]) {
btf_verifier_log(env, "Invalid string section"); btf_verifier_log(env, "Invalid string section");
...@@ -1896,20 +1965,121 @@ static int btf_parse_str_sec(struct btf_verifier_env *env) ...@@ -1896,20 +1965,121 @@ static int btf_parse_str_sec(struct btf_verifier_env *env)
return 0; return 0;
} }
static int btf_parse_hdr(struct btf_verifier_env *env) static const size_t btf_sec_info_offset[] = {
offsetof(struct btf_header, type_off),
offsetof(struct btf_header, str_off),
};
static int btf_sec_info_cmp(const void *a, const void *b)
{
const struct btf_sec_info *x = a;
const struct btf_sec_info *y = b;
return (int)(x->off - y->off) ? : (int)(x->len - y->len);
}
static int btf_check_sec_info(struct btf_verifier_env *env,
u32 btf_data_size)
{ {
struct btf_sec_info secs[ARRAY_SIZE(btf_sec_info_offset)];
u32 total, expected_total, i;
const struct btf_header *hdr; const struct btf_header *hdr;
struct btf *btf = env->btf; const struct btf *btf;
u32 meta_left;
btf = env->btf;
hdr = &btf->hdr;
if (btf->data_size < sizeof(*hdr)) { /* Populate the secs from hdr */
for (i = 0; i < ARRAY_SIZE(btf_sec_info_offset); i++)
secs[i] = *(struct btf_sec_info *)((void *)hdr +
btf_sec_info_offset[i]);
sort(secs, ARRAY_SIZE(btf_sec_info_offset),
sizeof(struct btf_sec_info), btf_sec_info_cmp, NULL);
/* Check for gaps and overlap among sections */
total = 0;
expected_total = btf_data_size - hdr->hdr_len;
for (i = 0; i < ARRAY_SIZE(btf_sec_info_offset); i++) {
if (expected_total < secs[i].off) {
btf_verifier_log(env, "Invalid section offset");
return -EINVAL;
}
if (total < secs[i].off) {
/* gap */
btf_verifier_log(env, "Unsupported section found");
return -EINVAL;
}
if (total > secs[i].off) {
btf_verifier_log(env, "Section overlap found");
return -EINVAL;
}
if (expected_total - total < secs[i].len) {
btf_verifier_log(env,
"Total section length too long");
return -EINVAL;
}
total += secs[i].len;
}
/* There is data other than hdr and known sections */
if (expected_total != total) {
btf_verifier_log(env, "Unsupported section found");
return -EINVAL;
}
return 0;
}
static int btf_parse_hdr(struct btf_verifier_env *env, void __user *btf_data,
u32 btf_data_size)
{
const struct btf_header *hdr;
u32 hdr_len, hdr_copy;
/*
* Minimal part of the "struct btf_header" that
* contains the hdr_len.
*/
struct btf_min_header {
u16 magic;
u8 version;
u8 flags;
u32 hdr_len;
} __user *min_hdr;
struct btf *btf;
int err;
btf = env->btf;
min_hdr = btf_data;
if (btf_data_size < sizeof(*min_hdr)) {
btf_verifier_log(env, "hdr_len not found");
return -EINVAL;
}
if (get_user(hdr_len, &min_hdr->hdr_len))
return -EFAULT;
if (btf_data_size < hdr_len) {
btf_verifier_log(env, "btf_header not found"); btf_verifier_log(env, "btf_header not found");
return -EINVAL; return -EINVAL;
} }
btf_verifier_log_hdr(env); err = bpf_check_uarg_tail_zero(btf_data, sizeof(btf->hdr), hdr_len);
if (err) {
if (err == -E2BIG)
btf_verifier_log(env, "Unsupported btf_header");
return err;
}
hdr_copy = min_t(u32, hdr_len, sizeof(btf->hdr));
if (copy_from_user(&btf->hdr, btf_data, hdr_copy))
return -EFAULT;
hdr = &btf->hdr;
btf_verifier_log_hdr(env, btf_data_size);
hdr = btf->hdr;
if (hdr->magic != BTF_MAGIC) { if (hdr->magic != BTF_MAGIC) {
btf_verifier_log(env, "Invalid magic"); btf_verifier_log(env, "Invalid magic");
return -EINVAL; return -EINVAL;
...@@ -1925,26 +2095,14 @@ static int btf_parse_hdr(struct btf_verifier_env *env) ...@@ -1925,26 +2095,14 @@ static int btf_parse_hdr(struct btf_verifier_env *env)
return -ENOTSUPP; return -ENOTSUPP;
} }
meta_left = btf->data_size - sizeof(*hdr); if (btf_data_size == hdr->hdr_len) {
if (!meta_left) {
btf_verifier_log(env, "No data"); btf_verifier_log(env, "No data");
return -EINVAL; return -EINVAL;
} }
if (meta_left < hdr->type_off || hdr->str_off <= hdr->type_off || err = btf_check_sec_info(env, btf_data_size);
/* Type section must align to 4 bytes */ if (err)
hdr->type_off & (sizeof(u32) - 1)) { return err;
btf_verifier_log(env, "Invalid type_off");
return -EINVAL;
}
if (meta_left < hdr->str_off ||
meta_left - hdr->str_off < hdr->str_len) {
btf_verifier_log(env, "Invalid str_off or str_len");
return -EINVAL;
}
btf->nohdr_data = btf->hdr + 1;
return 0; return 0;
} }
...@@ -1987,6 +2145,11 @@ static struct btf *btf_parse(void __user *btf_data, u32 btf_data_size, ...@@ -1987,6 +2145,11 @@ static struct btf *btf_parse(void __user *btf_data, u32 btf_data_size,
err = -ENOMEM; err = -ENOMEM;
goto errout; goto errout;
} }
env->btf = btf;
err = btf_parse_hdr(env, btf_data, btf_data_size);
if (err)
goto errout;
data = kvmalloc(btf_data_size, GFP_KERNEL | __GFP_NOWARN); data = kvmalloc(btf_data_size, GFP_KERNEL | __GFP_NOWARN);
if (!data) { if (!data) {
...@@ -1996,18 +2159,13 @@ static struct btf *btf_parse(void __user *btf_data, u32 btf_data_size, ...@@ -1996,18 +2159,13 @@ static struct btf *btf_parse(void __user *btf_data, u32 btf_data_size,
btf->data = data; btf->data = data;
btf->data_size = btf_data_size; btf->data_size = btf_data_size;
btf->nohdr_data = btf->data + btf->hdr.hdr_len;
if (copy_from_user(data, btf_data, btf_data_size)) { if (copy_from_user(data, btf_data, btf_data_size)) {
err = -EFAULT; err = -EFAULT;
goto errout; goto errout;
} }
env->btf = btf;
err = btf_parse_hdr(env);
if (err)
goto errout;
err = btf_parse_str_sec(env); err = btf_parse_str_sec(env);
if (err) if (err)
goto errout; goto errout;
...@@ -2016,16 +2174,14 @@ static struct btf *btf_parse(void __user *btf_data, u32 btf_data_size, ...@@ -2016,16 +2174,14 @@ static struct btf *btf_parse(void __user *btf_data, u32 btf_data_size,
if (err) if (err)
goto errout; goto errout;
if (!err && log->level && bpf_verifier_log_full(log)) { if (log->level && bpf_verifier_log_full(log)) {
err = -ENOSPC; err = -ENOSPC;
goto errout; goto errout;
} }
if (!err) { btf_verifier_env_free(env);
btf_verifier_env_free(env); refcount_set(&btf->refcnt, 1);
refcount_set(&btf->refcnt, 1); return btf;
return btf;
}
errout: errout:
btf_verifier_env_free(env); btf_verifier_env_free(env);
......
...@@ -578,7 +578,7 @@ static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu, ...@@ -578,7 +578,7 @@ static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu,
err = __ptr_ring_produce(q, xdpf); err = __ptr_ring_produce(q, xdpf);
if (err) { if (err) {
drops++; drops++;
xdp_return_frame(xdpf); xdp_return_frame_rx_napi(xdpf);
} }
processed++; processed++;
} }
......
...@@ -48,15 +48,25 @@ ...@@ -48,15 +48,25 @@
* calls will fail at this point. * calls will fail at this point.
*/ */
#include <linux/bpf.h> #include <linux/bpf.h>
#include <net/xdp.h>
#include <linux/filter.h> #include <linux/filter.h>
#include <trace/events/xdp.h>
#define DEV_CREATE_FLAG_MASK \ #define DEV_CREATE_FLAG_MASK \
(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY) (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
#define DEV_MAP_BULK_SIZE 16
struct xdp_bulk_queue {
struct xdp_frame *q[DEV_MAP_BULK_SIZE];
struct net_device *dev_rx;
unsigned int count;
};
struct bpf_dtab_netdev { struct bpf_dtab_netdev {
struct net_device *dev; struct net_device *dev; /* must be first member, due to tracepoint */
struct bpf_dtab *dtab; struct bpf_dtab *dtab;
unsigned int bit; unsigned int bit;
struct xdp_bulk_queue __percpu *bulkq;
struct rcu_head rcu; struct rcu_head rcu;
}; };
...@@ -206,6 +216,50 @@ void __dev_map_insert_ctx(struct bpf_map *map, u32 bit) ...@@ -206,6 +216,50 @@ void __dev_map_insert_ctx(struct bpf_map *map, u32 bit)
__set_bit(bit, bitmap); __set_bit(bit, bitmap);
} }
static int bq_xmit_all(struct bpf_dtab_netdev *obj,
struct xdp_bulk_queue *bq)
{
struct net_device *dev = obj->dev;
int sent = 0, drops = 0, err = 0;
int i;
if (unlikely(!bq->count))
return 0;
for (i = 0; i < bq->count; i++) {
struct xdp_frame *xdpf = bq->q[i];
prefetch(xdpf);
}
sent = dev->netdev_ops->ndo_xdp_xmit(dev, bq->count, bq->q);
if (sent < 0) {
err = sent;
sent = 0;
goto error;
}
drops = bq->count - sent;
out:
bq->count = 0;
trace_xdp_devmap_xmit(&obj->dtab->map, obj->bit,
sent, drops, bq->dev_rx, dev, err);
bq->dev_rx = NULL;
return 0;
error:
/* If ndo_xdp_xmit fails with an errno, no frames have been
* xmit'ed and it's our responsibility to them free all.
*/
for (i = 0; i < bq->count; i++) {
struct xdp_frame *xdpf = bq->q[i];
/* RX path under NAPI protection, can return frames faster */
xdp_return_frame_rx_napi(xdpf);
drops++;
}
goto out;
}
/* __dev_map_flush is called from xdp_do_flush_map() which _must_ be signaled /* __dev_map_flush is called from xdp_do_flush_map() which _must_ be signaled
* from the driver before returning from its napi->poll() routine. The poll() * from the driver before returning from its napi->poll() routine. The poll()
* routine is called either from busy_poll context or net_rx_action signaled * routine is called either from busy_poll context or net_rx_action signaled
...@@ -221,6 +275,7 @@ void __dev_map_flush(struct bpf_map *map) ...@@ -221,6 +275,7 @@ void __dev_map_flush(struct bpf_map *map)
for_each_set_bit(bit, bitmap, map->max_entries) { for_each_set_bit(bit, bitmap, map->max_entries) {
struct bpf_dtab_netdev *dev = READ_ONCE(dtab->netdev_map[bit]); struct bpf_dtab_netdev *dev = READ_ONCE(dtab->netdev_map[bit]);
struct xdp_bulk_queue *bq;
struct net_device *netdev; struct net_device *netdev;
/* This is possible if the dev entry is removed by user space /* This is possible if the dev entry is removed by user space
...@@ -230,6 +285,9 @@ void __dev_map_flush(struct bpf_map *map) ...@@ -230,6 +285,9 @@ void __dev_map_flush(struct bpf_map *map)
continue; continue;
__clear_bit(bit, bitmap); __clear_bit(bit, bitmap);
bq = this_cpu_ptr(dev->bulkq);
bq_xmit_all(dev, bq);
netdev = dev->dev; netdev = dev->dev;
if (likely(netdev->netdev_ops->ndo_xdp_flush)) if (likely(netdev->netdev_ops->ndo_xdp_flush))
netdev->netdev_ops->ndo_xdp_flush(netdev); netdev->netdev_ops->ndo_xdp_flush(netdev);
...@@ -240,21 +298,61 @@ void __dev_map_flush(struct bpf_map *map) ...@@ -240,21 +298,61 @@ void __dev_map_flush(struct bpf_map *map)
* update happens in parallel here a dev_put wont happen until after reading the * update happens in parallel here a dev_put wont happen until after reading the
* ifindex. * ifindex.
*/ */
struct net_device *__dev_map_lookup_elem(struct bpf_map *map, u32 key) struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
{ {
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
struct bpf_dtab_netdev *dev; struct bpf_dtab_netdev *obj;
if (key >= map->max_entries) if (key >= map->max_entries)
return NULL; return NULL;
dev = READ_ONCE(dtab->netdev_map[key]); obj = READ_ONCE(dtab->netdev_map[key]);
return dev ? dev->dev : NULL; return obj;
}
/* Runs under RCU-read-side, plus in softirq under NAPI protection.
* Thus, safe percpu variable access.
*/
static int bq_enqueue(struct bpf_dtab_netdev *obj, struct xdp_frame *xdpf,
struct net_device *dev_rx)
{
struct xdp_bulk_queue *bq = this_cpu_ptr(obj->bulkq);
if (unlikely(bq->count == DEV_MAP_BULK_SIZE))
bq_xmit_all(obj, bq);
/* Ingress dev_rx will be the same for all xdp_frame's in
* bulk_queue, because bq stored per-CPU and must be flushed
* from net_device drivers NAPI func end.
*/
if (!bq->dev_rx)
bq->dev_rx = dev_rx;
bq->q[bq->count++] = xdpf;
return 0;
}
int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
struct net_device *dev_rx)
{
struct net_device *dev = dst->dev;
struct xdp_frame *xdpf;
if (!dev->netdev_ops->ndo_xdp_xmit)
return -EOPNOTSUPP;
xdpf = convert_to_xdp_frame(xdp);
if (unlikely(!xdpf))
return -EOVERFLOW;
return bq_enqueue(dst, xdpf, dev_rx);
} }
static void *dev_map_lookup_elem(struct bpf_map *map, void *key) static void *dev_map_lookup_elem(struct bpf_map *map, void *key)
{ {
struct net_device *dev = __dev_map_lookup_elem(map, *(u32 *)key); struct bpf_dtab_netdev *obj = __dev_map_lookup_elem(map, *(u32 *)key);
struct net_device *dev = dev = obj ? obj->dev : NULL;
return dev ? &dev->ifindex : NULL; return dev ? &dev->ifindex : NULL;
} }
...@@ -263,13 +361,18 @@ static void dev_map_flush_old(struct bpf_dtab_netdev *dev) ...@@ -263,13 +361,18 @@ static void dev_map_flush_old(struct bpf_dtab_netdev *dev)
{ {
if (dev->dev->netdev_ops->ndo_xdp_flush) { if (dev->dev->netdev_ops->ndo_xdp_flush) {
struct net_device *fl = dev->dev; struct net_device *fl = dev->dev;
struct xdp_bulk_queue *bq;
unsigned long *bitmap; unsigned long *bitmap;
int cpu; int cpu;
for_each_online_cpu(cpu) { for_each_online_cpu(cpu) {
bitmap = per_cpu_ptr(dev->dtab->flush_needed, cpu); bitmap = per_cpu_ptr(dev->dtab->flush_needed, cpu);
__clear_bit(dev->bit, bitmap); __clear_bit(dev->bit, bitmap);
bq = per_cpu_ptr(dev->bulkq, cpu);
bq_xmit_all(dev, bq);
fl->netdev_ops->ndo_xdp_flush(dev->dev); fl->netdev_ops->ndo_xdp_flush(dev->dev);
} }
} }
...@@ -281,6 +384,7 @@ static void __dev_map_entry_free(struct rcu_head *rcu) ...@@ -281,6 +384,7 @@ static void __dev_map_entry_free(struct rcu_head *rcu)
dev = container_of(rcu, struct bpf_dtab_netdev, rcu); dev = container_of(rcu, struct bpf_dtab_netdev, rcu);
dev_map_flush_old(dev); dev_map_flush_old(dev);
free_percpu(dev->bulkq);
dev_put(dev->dev); dev_put(dev->dev);
kfree(dev); kfree(dev);
} }
...@@ -313,6 +417,7 @@ static int dev_map_update_elem(struct bpf_map *map, void *key, void *value, ...@@ -313,6 +417,7 @@ static int dev_map_update_elem(struct bpf_map *map, void *key, void *value,
{ {
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
struct net *net = current->nsproxy->net_ns; struct net *net = current->nsproxy->net_ns;
gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN;
struct bpf_dtab_netdev *dev, *old_dev; struct bpf_dtab_netdev *dev, *old_dev;
u32 i = *(u32 *)key; u32 i = *(u32 *)key;
u32 ifindex = *(u32 *)value; u32 ifindex = *(u32 *)value;
...@@ -327,13 +432,20 @@ static int dev_map_update_elem(struct bpf_map *map, void *key, void *value, ...@@ -327,13 +432,20 @@ static int dev_map_update_elem(struct bpf_map *map, void *key, void *value,
if (!ifindex) { if (!ifindex) {
dev = NULL; dev = NULL;
} else { } else {
dev = kmalloc_node(sizeof(*dev), GFP_ATOMIC | __GFP_NOWARN, dev = kmalloc_node(sizeof(*dev), gfp, map->numa_node);
map->numa_node);
if (!dev) if (!dev)
return -ENOMEM; return -ENOMEM;
dev->bulkq = __alloc_percpu_gfp(sizeof(*dev->bulkq),
sizeof(void *), gfp);
if (!dev->bulkq) {
kfree(dev);
return -ENOMEM;
}
dev->dev = dev_get_by_index(net, ifindex); dev->dev = dev_get_by_index(net, ifindex);
if (!dev->dev) { if (!dev->dev) {
free_percpu(dev->bulkq);
kfree(dev); kfree(dev);
return -EINVAL; return -EINVAL;
} }
...@@ -405,6 +517,9 @@ static struct notifier_block dev_map_notifier = { ...@@ -405,6 +517,9 @@ static struct notifier_block dev_map_notifier = {
static int __init dev_map_init(void) static int __init dev_map_init(void)
{ {
/* Assure tracepoint shadow struct _bpf_dtab_netdev is in sync */
BUILD_BUG_ON(offsetof(struct bpf_dtab_netdev, dev) !=
offsetof(struct _bpf_dtab_netdev, dev));
register_netdevice_notifier(&dev_map_notifier); register_netdevice_notifier(&dev_map_notifier);
return 0; return 0;
} }
......
...@@ -523,6 +523,7 @@ static unsigned int smap_do_tx_msg(struct sock *sk, ...@@ -523,6 +523,7 @@ static unsigned int smap_do_tx_msg(struct sock *sk,
} }
bpf_compute_data_pointers_sg(md); bpf_compute_data_pointers_sg(md);
md->sk = sk;
rc = (*prog->bpf_func)(md, prog->insnsi); rc = (*prog->bpf_func)(md, prog->insnsi);
psock->apply_bytes = md->apply_bytes; psock->apply_bytes = md->apply_bytes;
...@@ -1713,7 +1714,7 @@ static int __sock_map_ctx_update_elem(struct bpf_map *map, ...@@ -1713,7 +1714,7 @@ static int __sock_map_ctx_update_elem(struct bpf_map *map,
struct smap_psock_map_entry *e = NULL; struct smap_psock_map_entry *e = NULL;
struct smap_psock *psock; struct smap_psock *psock;
bool new = false; bool new = false;
int err; int err = 0;
/* 1. If sock map has BPF programs those will be inherited by the /* 1. If sock map has BPF programs those will be inherited by the
* sock being added. If the sock is already attached to BPF programs * sock being added. If the sock is already attached to BPF programs
...@@ -1823,7 +1824,6 @@ static int __sock_map_ctx_update_elem(struct bpf_map *map, ...@@ -1823,7 +1824,6 @@ static int __sock_map_ctx_update_elem(struct bpf_map *map,
write_unlock_bh(&sock->sk_callback_lock); write_unlock_bh(&sock->sk_callback_lock);
return err; return err;
out_free: out_free:
kfree(e);
smap_release_sock(psock, sock); smap_release_sock(psock, sock);
out_progs: out_progs:
if (parse && verdict) { if (parse && verdict) {
......
...@@ -18,7 +18,9 @@ ...@@ -18,7 +18,9 @@
#include <linux/vmalloc.h> #include <linux/vmalloc.h>
#include <linux/mmzone.h> #include <linux/mmzone.h>
#include <linux/anon_inodes.h> #include <linux/anon_inodes.h>
#include <linux/fdtable.h>
#include <linux/file.h> #include <linux/file.h>
#include <linux/fs.h>
#include <linux/license.h> #include <linux/license.h>
#include <linux/filter.h> #include <linux/filter.h>
#include <linux/version.h> #include <linux/version.h>
...@@ -65,9 +67,9 @@ static const struct bpf_map_ops * const bpf_map_types[] = { ...@@ -65,9 +67,9 @@ static const struct bpf_map_ops * const bpf_map_types[] = {
* copy_from_user() call. However, this is not a concern since this function is * copy_from_user() call. However, this is not a concern since this function is
* meant to be a future-proofing of bits. * meant to be a future-proofing of bits.
*/ */
static int check_uarg_tail_zero(void __user *uaddr, int bpf_check_uarg_tail_zero(void __user *uaddr,
size_t expected_size, size_t expected_size,
size_t actual_size) size_t actual_size)
{ {
unsigned char __user *addr; unsigned char __user *addr;
unsigned char __user *end; unsigned char __user *end;
...@@ -422,7 +424,7 @@ static int bpf_obj_name_cpy(char *dst, const char *src) ...@@ -422,7 +424,7 @@ static int bpf_obj_name_cpy(char *dst, const char *src)
return 0; return 0;
} }
#define BPF_MAP_CREATE_LAST_FIELD btf_value_id #define BPF_MAP_CREATE_LAST_FIELD btf_value_type_id
/* called via syscall */ /* called via syscall */
static int map_create(union bpf_attr *attr) static int map_create(union bpf_attr *attr)
{ {
...@@ -457,10 +459,10 @@ static int map_create(union bpf_attr *attr) ...@@ -457,10 +459,10 @@ static int map_create(union bpf_attr *attr)
atomic_set(&map->usercnt, 1); atomic_set(&map->usercnt, 1);
if (bpf_map_support_seq_show(map) && if (bpf_map_support_seq_show(map) &&
(attr->btf_key_id || attr->btf_value_id)) { (attr->btf_key_type_id || attr->btf_value_type_id)) {
struct btf *btf; struct btf *btf;
if (!attr->btf_key_id || !attr->btf_value_id) { if (!attr->btf_key_type_id || !attr->btf_value_type_id) {
err = -EINVAL; err = -EINVAL;
goto free_map_nouncharge; goto free_map_nouncharge;
} }
...@@ -471,16 +473,16 @@ static int map_create(union bpf_attr *attr) ...@@ -471,16 +473,16 @@ static int map_create(union bpf_attr *attr)
goto free_map_nouncharge; goto free_map_nouncharge;
} }
err = map->ops->map_check_btf(map, btf, attr->btf_key_id, err = map->ops->map_check_btf(map, btf, attr->btf_key_type_id,
attr->btf_value_id); attr->btf_value_type_id);
if (err) { if (err) {
btf_put(btf); btf_put(btf);
goto free_map_nouncharge; goto free_map_nouncharge;
} }
map->btf = btf; map->btf = btf;
map->btf_key_id = attr->btf_key_id; map->btf_key_type_id = attr->btf_key_type_id;
map->btf_value_id = attr->btf_value_id; map->btf_value_type_id = attr->btf_value_type_id;
} }
err = security_bpf_map_alloc(map); err = security_bpf_map_alloc(map);
...@@ -1899,7 +1901,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, ...@@ -1899,7 +1901,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
u32 ulen; u32 ulen;
int err; int err;
err = check_uarg_tail_zero(uinfo, sizeof(info), info_len); err = bpf_check_uarg_tail_zero(uinfo, sizeof(info), info_len);
if (err) if (err)
return err; return err;
info_len = min_t(u32, sizeof(info), info_len); info_len = min_t(u32, sizeof(info), info_len);
...@@ -1933,6 +1935,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, ...@@ -1933,6 +1935,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
if (!capable(CAP_SYS_ADMIN)) { if (!capable(CAP_SYS_ADMIN)) {
info.jited_prog_len = 0; info.jited_prog_len = 0;
info.xlated_prog_len = 0; info.xlated_prog_len = 0;
info.nr_jited_ksyms = 0;
goto done; goto done;
} }
...@@ -1969,18 +1972,93 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, ...@@ -1969,18 +1972,93 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
* for offload. * for offload.
*/ */
ulen = info.jited_prog_len; ulen = info.jited_prog_len;
info.jited_prog_len = prog->jited_len; if (prog->aux->func_cnt) {
u32 i;
info.jited_prog_len = 0;
for (i = 0; i < prog->aux->func_cnt; i++)
info.jited_prog_len += prog->aux->func[i]->jited_len;
} else {
info.jited_prog_len = prog->jited_len;
}
if (info.jited_prog_len && ulen) { if (info.jited_prog_len && ulen) {
if (bpf_dump_raw_ok()) { if (bpf_dump_raw_ok()) {
uinsns = u64_to_user_ptr(info.jited_prog_insns); uinsns = u64_to_user_ptr(info.jited_prog_insns);
ulen = min_t(u32, info.jited_prog_len, ulen); ulen = min_t(u32, info.jited_prog_len, ulen);
if (copy_to_user(uinsns, prog->bpf_func, ulen))
return -EFAULT; /* for multi-function programs, copy the JITed
* instructions for all the functions
*/
if (prog->aux->func_cnt) {
u32 len, free, i;
u8 *img;
free = ulen;
for (i = 0; i < prog->aux->func_cnt; i++) {
len = prog->aux->func[i]->jited_len;
len = min_t(u32, len, free);
img = (u8 *) prog->aux->func[i]->bpf_func;
if (copy_to_user(uinsns, img, len))
return -EFAULT;
uinsns += len;
free -= len;
if (!free)
break;
}
} else {
if (copy_to_user(uinsns, prog->bpf_func, ulen))
return -EFAULT;
}
} else { } else {
info.jited_prog_insns = 0; info.jited_prog_insns = 0;
} }
} }
ulen = info.nr_jited_ksyms;
info.nr_jited_ksyms = prog->aux->func_cnt;
if (info.nr_jited_ksyms && ulen) {
if (bpf_dump_raw_ok()) {
u64 __user *user_ksyms;
ulong ksym_addr;
u32 i;
/* copy the address of the kernel symbol
* corresponding to each function
*/
ulen = min_t(u32, info.nr_jited_ksyms, ulen);
user_ksyms = u64_to_user_ptr(info.jited_ksyms);
for (i = 0; i < ulen; i++) {
ksym_addr = (ulong) prog->aux->func[i]->bpf_func;
ksym_addr &= PAGE_MASK;
if (put_user((u64) ksym_addr, &user_ksyms[i]))
return -EFAULT;
}
} else {
info.jited_ksyms = 0;
}
}
ulen = info.nr_jited_func_lens;
info.nr_jited_func_lens = prog->aux->func_cnt;
if (info.nr_jited_func_lens && ulen) {
if (bpf_dump_raw_ok()) {
u32 __user *user_lens;
u32 func_len, i;
/* copy the JITed image lengths for each function */
ulen = min_t(u32, info.nr_jited_func_lens, ulen);
user_lens = u64_to_user_ptr(info.jited_func_lens);
for (i = 0; i < ulen; i++) {
func_len = prog->aux->func[i]->jited_len;
if (put_user(func_len, &user_lens[i]))
return -EFAULT;
}
} else {
info.jited_func_lens = 0;
}
}
done: done:
if (copy_to_user(uinfo, &info, info_len) || if (copy_to_user(uinfo, &info, info_len) ||
put_user(info_len, &uattr->info.info_len)) put_user(info_len, &uattr->info.info_len))
...@@ -1998,7 +2076,7 @@ static int bpf_map_get_info_by_fd(struct bpf_map *map, ...@@ -1998,7 +2076,7 @@ static int bpf_map_get_info_by_fd(struct bpf_map *map,
u32 info_len = attr->info.info_len; u32 info_len = attr->info.info_len;
int err; int err;
err = check_uarg_tail_zero(uinfo, sizeof(info), info_len); err = bpf_check_uarg_tail_zero(uinfo, sizeof(info), info_len);
if (err) if (err)
return err; return err;
info_len = min_t(u32, sizeof(info), info_len); info_len = min_t(u32, sizeof(info), info_len);
...@@ -2013,8 +2091,8 @@ static int bpf_map_get_info_by_fd(struct bpf_map *map, ...@@ -2013,8 +2091,8 @@ static int bpf_map_get_info_by_fd(struct bpf_map *map,
if (map->btf) { if (map->btf) {
info.btf_id = btf_id(map->btf); info.btf_id = btf_id(map->btf);
info.btf_key_id = map->btf_key_id; info.btf_key_type_id = map->btf_key_type_id;
info.btf_value_id = map->btf_value_id; info.btf_value_type_id = map->btf_value_type_id;
} }
if (bpf_map_is_dev_bound(map)) { if (bpf_map_is_dev_bound(map)) {
...@@ -2038,7 +2116,7 @@ static int bpf_btf_get_info_by_fd(struct btf *btf, ...@@ -2038,7 +2116,7 @@ static int bpf_btf_get_info_by_fd(struct btf *btf,
u32 info_len = attr->info.info_len; u32 info_len = attr->info.info_len;
int err; int err;
err = check_uarg_tail_zero(uinfo, sizeof(*uinfo), info_len); err = bpf_check_uarg_tail_zero(uinfo, sizeof(*uinfo), info_len);
if (err) if (err)
return err; return err;
...@@ -2102,6 +2180,132 @@ static int bpf_btf_get_fd_by_id(const union bpf_attr *attr) ...@@ -2102,6 +2180,132 @@ static int bpf_btf_get_fd_by_id(const union bpf_attr *attr)
return btf_get_fd_by_id(attr->btf_id); return btf_get_fd_by_id(attr->btf_id);
} }
static int bpf_task_fd_query_copy(const union bpf_attr *attr,
union bpf_attr __user *uattr,
u32 prog_id, u32 fd_type,
const char *buf, u64 probe_offset,
u64 probe_addr)
{
char __user *ubuf = u64_to_user_ptr(attr->task_fd_query.buf);
u32 len = buf ? strlen(buf) : 0, input_len;
int err = 0;
if (put_user(len, &uattr->task_fd_query.buf_len))
return -EFAULT;
input_len = attr->task_fd_query.buf_len;
if (input_len && ubuf) {
if (!len) {
/* nothing to copy, just make ubuf NULL terminated */
char zero = '\0';
if (put_user(zero, ubuf))
return -EFAULT;
} else if (input_len >= len + 1) {
/* ubuf can hold the string with NULL terminator */
if (copy_to_user(ubuf, buf, len + 1))
return -EFAULT;
} else {
/* ubuf cannot hold the string with NULL terminator,
* do a partial copy with NULL terminator.
*/
char zero = '\0';
err = -ENOSPC;
if (copy_to_user(ubuf, buf, input_len - 1))
return -EFAULT;
if (put_user(zero, ubuf + input_len - 1))
return -EFAULT;
}
}
if (put_user(prog_id, &uattr->task_fd_query.prog_id) ||
put_user(fd_type, &uattr->task_fd_query.fd_type) ||
put_user(probe_offset, &uattr->task_fd_query.probe_offset) ||
put_user(probe_addr, &uattr->task_fd_query.probe_addr))
return -EFAULT;
return err;
}
#define BPF_TASK_FD_QUERY_LAST_FIELD task_fd_query.probe_addr
static int bpf_task_fd_query(const union bpf_attr *attr,
union bpf_attr __user *uattr)
{
pid_t pid = attr->task_fd_query.pid;
u32 fd = attr->task_fd_query.fd;
const struct perf_event *event;
struct files_struct *files;
struct task_struct *task;
struct file *file;
int err;
if (CHECK_ATTR(BPF_TASK_FD_QUERY))
return -EINVAL;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (attr->task_fd_query.flags != 0)
return -EINVAL;
task = get_pid_task(find_vpid(pid), PIDTYPE_PID);
if (!task)
return -ENOENT;
files = get_files_struct(task);
put_task_struct(task);
if (!files)
return -ENOENT;
err = 0;
spin_lock(&files->file_lock);
file = fcheck_files(files, fd);
if (!file)
err = -EBADF;
else
get_file(file);
spin_unlock(&files->file_lock);
put_files_struct(files);
if (err)
goto out;
if (file->f_op == &bpf_raw_tp_fops) {
struct bpf_raw_tracepoint *raw_tp = file->private_data;
struct bpf_raw_event_map *btp = raw_tp->btp;
err = bpf_task_fd_query_copy(attr, uattr,
raw_tp->prog->aux->id,
BPF_FD_TYPE_RAW_TRACEPOINT,
btp->tp->name, 0, 0);
goto put_file;
}
event = perf_get_event(file);
if (!IS_ERR(event)) {
u64 probe_offset, probe_addr;
u32 prog_id, fd_type;
const char *buf;
err = bpf_get_perf_event_info(event, &prog_id, &fd_type,
&buf, &probe_offset,
&probe_addr);
if (!err)
err = bpf_task_fd_query_copy(attr, uattr, prog_id,
fd_type, buf,
probe_offset,
probe_addr);
goto put_file;
}
err = -ENOTSUPP;
put_file:
fput(file);
out:
return err;
}
SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
{ {
union bpf_attr attr = {}; union bpf_attr attr = {};
...@@ -2110,7 +2314,7 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz ...@@ -2110,7 +2314,7 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
if (sysctl_unprivileged_bpf_disabled && !capable(CAP_SYS_ADMIN)) if (sysctl_unprivileged_bpf_disabled && !capable(CAP_SYS_ADMIN))
return -EPERM; return -EPERM;
err = check_uarg_tail_zero(uattr, sizeof(attr), size); err = bpf_check_uarg_tail_zero(uattr, sizeof(attr), size);
if (err) if (err)
return err; return err;
size = min_t(u32, size, sizeof(attr)); size = min_t(u32, size, sizeof(attr));
...@@ -2188,6 +2392,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz ...@@ -2188,6 +2392,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
case BPF_BTF_GET_FD_BY_ID: case BPF_BTF_GET_FD_BY_ID:
err = bpf_btf_get_fd_by_id(&attr); err = bpf_btf_get_fd_by_id(&attr);
break; break;
case BPF_TASK_FD_QUERY:
err = bpf_task_fd_query(&attr, uattr);
break;
default: default:
err = -EINVAL; err = -EINVAL;
break; break;
......
...@@ -1262,6 +1262,7 @@ static bool may_access_direct_pkt_data(struct bpf_verifier_env *env, ...@@ -1262,6 +1262,7 @@ static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
switch (env->prog->type) { switch (env->prog->type) {
case BPF_PROG_TYPE_LWT_IN: case BPF_PROG_TYPE_LWT_IN:
case BPF_PROG_TYPE_LWT_OUT: case BPF_PROG_TYPE_LWT_OUT:
case BPF_PROG_TYPE_LWT_SEG6LOCAL:
/* dst_input() and dst_output() can't write for now */ /* dst_input() and dst_output() can't write for now */
if (t == BPF_WRITE) if (t == BPF_WRITE)
return false; return false;
...@@ -5383,11 +5384,24 @@ static int jit_subprogs(struct bpf_verifier_env *env) ...@@ -5383,11 +5384,24 @@ static int jit_subprogs(struct bpf_verifier_env *env)
insn->src_reg != BPF_PSEUDO_CALL) insn->src_reg != BPF_PSEUDO_CALL)
continue; continue;
subprog = insn->off; subprog = insn->off;
insn->off = 0;
insn->imm = (u64 (*)(u64, u64, u64, u64, u64)) insn->imm = (u64 (*)(u64, u64, u64, u64, u64))
func[subprog]->bpf_func - func[subprog]->bpf_func -
__bpf_call_base; __bpf_call_base;
} }
/* we use the aux data to keep a list of the start addresses
* of the JITed images for each function in the program
*
* for some architectures, such as powerpc64, the imm field
* might not be large enough to hold the offset of the start
* address of the callee's JITed image from __bpf_call_base
*
* in such cases, we can lookup the start address of a callee
* by using its subprog id, available from the off field of
* the call instruction, as an index for this list
*/
func[i]->aux->func = func;
func[i]->aux->func_cnt = env->subprog_cnt;
} }
for (i = 0; i < env->subprog_cnt; i++) { for (i = 0; i < env->subprog_cnt; i++) {
old_bpf_func = func[i]->bpf_func; old_bpf_func = func[i]->bpf_func;
...@@ -5413,17 +5427,12 @@ static int jit_subprogs(struct bpf_verifier_env *env) ...@@ -5413,17 +5427,12 @@ static int jit_subprogs(struct bpf_verifier_env *env)
* later look the same as if they were interpreted only. * later look the same as if they were interpreted only.
*/ */
for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) { for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
unsigned long addr;
if (insn->code != (BPF_JMP | BPF_CALL) || if (insn->code != (BPF_JMP | BPF_CALL) ||
insn->src_reg != BPF_PSEUDO_CALL) insn->src_reg != BPF_PSEUDO_CALL)
continue; continue;
insn->off = env->insn_aux_data[i].call_imm; insn->off = env->insn_aux_data[i].call_imm;
subprog = find_subprog(env, i + insn->off + 1); subprog = find_subprog(env, i + insn->off + 1);
addr = (unsigned long)func[subprog]->bpf_func; insn->imm = subprog;
addr &= PAGE_MASK;
insn->imm = (u64 (*)(u64, u64, u64, u64, u64))
addr - __bpf_call_base;
} }
prog->jited = 1; prog->jited = 1;
......
// SPDX-License-Identifier: GPL-2.0 // SPDX-License-Identifier: GPL-2.0
/* XSKMAP used for AF_XDP sockets /* XSKMAP used for AF_XDP sockets
* Copyright(c) 2018 Intel Corporation. * Copyright(c) 2018 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/ */
#include <linux/bpf.h> #include <linux/bpf.h>
......
...@@ -11212,6 +11212,14 @@ struct file *perf_event_get(unsigned int fd) ...@@ -11212,6 +11212,14 @@ struct file *perf_event_get(unsigned int fd)
return file; return file;
} }
const struct perf_event *perf_get_event(struct file *file)
{
if (file->f_op != &perf_fops)
return ERR_PTR(-EINVAL);
return file->private_data;
}
const struct perf_event_attr *perf_event_attrs(struct perf_event *event) const struct perf_event_attr *perf_event_attrs(struct perf_event *event)
{ {
if (!event) if (!event)
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include <linux/uaccess.h> #include <linux/uaccess.h>
#include <linux/ctype.h> #include <linux/ctype.h>
#include <linux/kprobes.h> #include <linux/kprobes.h>
#include <linux/syscalls.h>
#include <linux/error-injection.h> #include <linux/error-injection.h>
#include "trace_probe.h" #include "trace_probe.h"
...@@ -1163,3 +1164,50 @@ int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog) ...@@ -1163,3 +1164,50 @@ int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog)
mutex_unlock(&bpf_event_mutex); mutex_unlock(&bpf_event_mutex);
return err; return err;
} }
int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
u32 *fd_type, const char **buf,
u64 *probe_offset, u64 *probe_addr)
{
bool is_tracepoint, is_syscall_tp;
struct bpf_prog *prog;
int flags, err = 0;
prog = event->prog;
if (!prog)
return -ENOENT;
/* not supporting BPF_PROG_TYPE_PERF_EVENT yet */
if (prog->type == BPF_PROG_TYPE_PERF_EVENT)
return -EOPNOTSUPP;
*prog_id = prog->aux->id;
flags = event->tp_event->flags;
is_tracepoint = flags & TRACE_EVENT_FL_TRACEPOINT;
is_syscall_tp = is_syscall_trace_event(event->tp_event);
if (is_tracepoint || is_syscall_tp) {
*buf = is_tracepoint ? event->tp_event->tp->name
: event->tp_event->name;
*fd_type = BPF_FD_TYPE_TRACEPOINT;
*probe_offset = 0x0;
*probe_addr = 0x0;
} else {
/* kprobe/uprobe */
err = -EOPNOTSUPP;
#ifdef CONFIG_KPROBE_EVENTS
if (flags & TRACE_EVENT_FL_KPROBE)
err = bpf_get_kprobe_info(event, fd_type, buf,
probe_offset, probe_addr,
event->attr.type == PERF_TYPE_TRACEPOINT);
#endif
#ifdef CONFIG_UPROBE_EVENTS
if (flags & TRACE_EVENT_FL_UPROBE)
err = bpf_get_uprobe_info(event, fd_type, buf,
probe_offset,
event->attr.type == PERF_TYPE_TRACEPOINT);
#endif
}
return err;
}
...@@ -1287,6 +1287,35 @@ kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, ...@@ -1287,6 +1287,35 @@ kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
head, NULL); head, NULL);
} }
NOKPROBE_SYMBOL(kretprobe_perf_func); NOKPROBE_SYMBOL(kretprobe_perf_func);
int bpf_get_kprobe_info(const struct perf_event *event, u32 *fd_type,
const char **symbol, u64 *probe_offset,
u64 *probe_addr, bool perf_type_tracepoint)
{
const char *pevent = trace_event_name(event->tp_event);
const char *group = event->tp_event->class->system;
struct trace_kprobe *tk;
if (perf_type_tracepoint)
tk = find_trace_kprobe(pevent, group);
else
tk = event->tp_event->data;
if (!tk)
return -EINVAL;
*fd_type = trace_kprobe_is_return(tk) ? BPF_FD_TYPE_KRETPROBE
: BPF_FD_TYPE_KPROBE;
if (tk->symbol) {
*symbol = tk->symbol;
*probe_offset = tk->rp.kp.offset;
*probe_addr = 0;
} else {
*symbol = NULL;
*probe_offset = 0;
*probe_addr = (unsigned long)tk->rp.kp.addr;
}
return 0;
}
#endif /* CONFIG_PERF_EVENTS */ #endif /* CONFIG_PERF_EVENTS */
/* /*
......
...@@ -1161,6 +1161,28 @@ static void uretprobe_perf_func(struct trace_uprobe *tu, unsigned long func, ...@@ -1161,6 +1161,28 @@ static void uretprobe_perf_func(struct trace_uprobe *tu, unsigned long func,
{ {
__uprobe_perf_func(tu, func, regs, ucb, dsize); __uprobe_perf_func(tu, func, regs, ucb, dsize);
} }
int bpf_get_uprobe_info(const struct perf_event *event, u32 *fd_type,
const char **filename, u64 *probe_offset,
bool perf_type_tracepoint)
{
const char *pevent = trace_event_name(event->tp_event);
const char *group = event->tp_event->class->system;
struct trace_uprobe *tu;
if (perf_type_tracepoint)
tu = find_probe_event(pevent, group);
else
tu = event->tp_event->data;
if (!tu)
return -EINVAL;
*fd_type = is_ret_probe(tu) ? BPF_FD_TYPE_URETPROBE
: BPF_FD_TYPE_UPROBE;
*filename = tu->filename;
*probe_offset = tu->offset;
return 0;
}
#endif /* CONFIG_PERF_EVENTS */ #endif /* CONFIG_PERF_EVENTS */
static int static int
......
此差异已折叠。
...@@ -308,7 +308,13 @@ int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq, ...@@ -308,7 +308,13 @@ int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
} }
EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model); EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model);
static void xdp_return(void *data, struct xdp_mem_info *mem) /* XDP RX runs under NAPI protection, and in different delivery error
* scenarios (e.g. queue full), it is possible to return the xdp_frame
* while still leveraging this protection. The @napi_direct boolian
* is used for those calls sites. Thus, allowing for faster recycling
* of xdp_frames/pages in those cases.
*/
static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct)
{ {
struct xdp_mem_allocator *xa; struct xdp_mem_allocator *xa;
struct page *page; struct page *page;
...@@ -320,7 +326,7 @@ static void xdp_return(void *data, struct xdp_mem_info *mem) ...@@ -320,7 +326,7 @@ static void xdp_return(void *data, struct xdp_mem_info *mem)
xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params); xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params);
page = virt_to_head_page(data); page = virt_to_head_page(data);
if (xa) if (xa)
page_pool_put_page(xa->page_pool, page); page_pool_put_page(xa->page_pool, page, napi_direct);
else else
put_page(page); put_page(page);
rcu_read_unlock(); rcu_read_unlock();
...@@ -340,12 +346,18 @@ static void xdp_return(void *data, struct xdp_mem_info *mem) ...@@ -340,12 +346,18 @@ static void xdp_return(void *data, struct xdp_mem_info *mem)
void xdp_return_frame(struct xdp_frame *xdpf) void xdp_return_frame(struct xdp_frame *xdpf)
{ {
xdp_return(xdpf->data, &xdpf->mem); __xdp_return(xdpf->data, &xdpf->mem, false);
} }
EXPORT_SYMBOL_GPL(xdp_return_frame); EXPORT_SYMBOL_GPL(xdp_return_frame);
void xdp_return_frame_rx_napi(struct xdp_frame *xdpf)
{
__xdp_return(xdpf->data, &xdpf->mem, true);
}
EXPORT_SYMBOL_GPL(xdp_return_frame_rx_napi);
void xdp_return_buff(struct xdp_buff *xdp) void xdp_return_buff(struct xdp_buff *xdp)
{ {
xdp_return(xdp->data, &xdp->rxq->mem); __xdp_return(xdp->data, &xdp->rxq->mem, true);
} }
EXPORT_SYMBOL_GPL(xdp_return_buff); EXPORT_SYMBOL_GPL(xdp_return_buff);
...@@ -1352,6 +1352,37 @@ static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr) ...@@ -1352,6 +1352,37 @@ static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
return NULL; return NULL;
} }
/* MTU selection:
* 1. mtu on route is locked - use it
* 2. mtu from nexthop exception
* 3. mtu from egress device
*/
u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr)
{
struct fib_info *fi = res->fi;
struct fib_nh *nh = &fi->fib_nh[res->nh_sel];
struct net_device *dev = nh->nh_dev;
u32 mtu = 0;
if (dev_net(dev)->ipv4.sysctl_ip_fwd_use_pmtu ||
fi->fib_metrics->metrics[RTAX_LOCK - 1] & (1 << RTAX_MTU))
mtu = fi->fib_mtu;
if (likely(!mtu)) {
struct fib_nh_exception *fnhe;
fnhe = find_exception(nh, daddr);
if (fnhe && !time_after_eq(jiffies, fnhe->fnhe_expires))
mtu = fnhe->fnhe_pmtu;
}
if (likely(!mtu))
mtu = min(READ_ONCE(dev->mtu), IP_MAX_MTU);
return mtu - lwtunnel_headroom(nh->nh_lwtstate, mtu);
}
static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
__be32 daddr, const bool do_cache) __be32 daddr, const bool do_cache)
{ {
......
...@@ -329,4 +329,9 @@ config IPV6_SEG6_HMAC ...@@ -329,4 +329,9 @@ config IPV6_SEG6_HMAC
If unsure, say N. If unsure, say N.
config IPV6_SEG6_BPF
def_bool y
depends on IPV6_SEG6_LWTUNNEL
depends on IPV6 = y
endif # IPV6 endif # IPV6
...@@ -161,12 +161,20 @@ eafnosupport_fib6_multipath_select(const struct net *net, struct fib6_info *f6i, ...@@ -161,12 +161,20 @@ eafnosupport_fib6_multipath_select(const struct net *net, struct fib6_info *f6i,
return f6i; return f6i;
} }
static u32
eafnosupport_ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
struct in6_addr *saddr)
{
return 0;
}
const struct ipv6_stub *ipv6_stub __read_mostly = &(struct ipv6_stub) { const struct ipv6_stub *ipv6_stub __read_mostly = &(struct ipv6_stub) {
.ipv6_dst_lookup = eafnosupport_ipv6_dst_lookup, .ipv6_dst_lookup = eafnosupport_ipv6_dst_lookup,
.fib6_get_table = eafnosupport_fib6_get_table, .fib6_get_table = eafnosupport_fib6_get_table,
.fib6_table_lookup = eafnosupport_fib6_table_lookup, .fib6_table_lookup = eafnosupport_fib6_table_lookup,
.fib6_lookup = eafnosupport_fib6_lookup, .fib6_lookup = eafnosupport_fib6_lookup,
.fib6_multipath_select = eafnosupport_fib6_multipath_select, .fib6_multipath_select = eafnosupport_fib6_multipath_select,
.ip6_mtu_from_fib6 = eafnosupport_ip6_mtu_from_fib6,
}; };
EXPORT_SYMBOL_GPL(ipv6_stub); EXPORT_SYMBOL_GPL(ipv6_stub);
......
...@@ -894,6 +894,7 @@ static const struct ipv6_stub ipv6_stub_impl = { ...@@ -894,6 +894,7 @@ static const struct ipv6_stub ipv6_stub_impl = {
.fib6_table_lookup = fib6_table_lookup, .fib6_table_lookup = fib6_table_lookup,
.fib6_lookup = fib6_lookup, .fib6_lookup = fib6_lookup,
.fib6_multipath_select = fib6_multipath_select, .fib6_multipath_select = fib6_multipath_select,
.ip6_mtu_from_fib6 = ip6_mtu_from_fib6,
.udpv6_encap_enable = udpv6_encap_enable, .udpv6_encap_enable = udpv6_encap_enable,
.ndisc_send_na = ndisc_send_na, .ndisc_send_na = ndisc_send_na,
.nd_tbl = &nd_tbl, .nd_tbl = &nd_tbl,
......
...@@ -2604,6 +2604,54 @@ static unsigned int ip6_mtu(const struct dst_entry *dst) ...@@ -2604,6 +2604,54 @@ static unsigned int ip6_mtu(const struct dst_entry *dst)
return mtu - lwtunnel_headroom(dst->lwtstate, mtu); return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
} }
/* MTU selection:
* 1. mtu on route is locked - use it
* 2. mtu from nexthop exception
* 3. mtu from egress device
*
* based on ip6_dst_mtu_forward and exception logic of
* rt6_find_cached_rt; called with rcu_read_lock
*/
u32 ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
struct in6_addr *saddr)
{
struct rt6_exception_bucket *bucket;
struct rt6_exception *rt6_ex;
struct in6_addr *src_key;
struct inet6_dev *idev;
u32 mtu = 0;
if (unlikely(fib6_metric_locked(f6i, RTAX_MTU))) {
mtu = f6i->fib6_pmtu;
if (mtu)
goto out;
}
src_key = NULL;
#ifdef CONFIG_IPV6_SUBTREES
if (f6i->fib6_src.plen)
src_key = saddr;
#endif
bucket = rcu_dereference(f6i->rt6i_exception_bucket);
rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
mtu = dst_metric_raw(&rt6_ex->rt6i->dst, RTAX_MTU);
if (likely(!mtu)) {
struct net_device *dev = fib6_info_nh_dev(f6i);
mtu = IPV6_MIN_MTU;
idev = __in6_dev_get(dev);
if (idev && idev->cnf.mtu6 > mtu)
mtu = idev->cnf.mtu6;
}
mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
out:
return mtu - lwtunnel_headroom(fib6_info_nh_lwt(f6i), mtu);
}
struct dst_entry *icmp6_dst_alloc(struct net_device *dev, struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
struct flowi6 *fl6) struct flowi6 *fl6)
{ {
......
/* /*
* SR-IPv6 implementation * SR-IPv6 implementation
* *
* Author: * Authors:
* David Lebrun <david.lebrun@uclouvain.be> * David Lebrun <david.lebrun@uclouvain.be>
* eBPF support: Mathieu Xhonneux <m.xhonneux@gmail.com>
* *
* *
* This program is free software; you can redistribute it and/or * This program is free software; you can redistribute it and/or
...@@ -30,7 +31,9 @@ ...@@ -30,7 +31,9 @@
#ifdef CONFIG_IPV6_SEG6_HMAC #ifdef CONFIG_IPV6_SEG6_HMAC
#include <net/seg6_hmac.h> #include <net/seg6_hmac.h>
#endif #endif
#include <net/seg6_local.h>
#include <linux/etherdevice.h> #include <linux/etherdevice.h>
#include <linux/bpf.h>
struct seg6_local_lwt; struct seg6_local_lwt;
...@@ -41,6 +44,11 @@ struct seg6_action_desc { ...@@ -41,6 +44,11 @@ struct seg6_action_desc {
int static_headroom; int static_headroom;
}; };
struct bpf_lwt_prog {
struct bpf_prog *prog;
char *name;
};
struct seg6_local_lwt { struct seg6_local_lwt {
int action; int action;
struct ipv6_sr_hdr *srh; struct ipv6_sr_hdr *srh;
...@@ -49,6 +57,7 @@ struct seg6_local_lwt { ...@@ -49,6 +57,7 @@ struct seg6_local_lwt {
struct in6_addr nh6; struct in6_addr nh6;
int iif; int iif;
int oif; int oif;
struct bpf_lwt_prog bpf;
int headroom; int headroom;
struct seg6_action_desc *desc; struct seg6_action_desc *desc;
...@@ -140,8 +149,8 @@ static void advance_nextseg(struct ipv6_sr_hdr *srh, struct in6_addr *daddr) ...@@ -140,8 +149,8 @@ static void advance_nextseg(struct ipv6_sr_hdr *srh, struct in6_addr *daddr)
*daddr = *addr; *daddr = *addr;
} }
static void lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr, int seg6_lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
u32 tbl_id) u32 tbl_id)
{ {
struct net *net = dev_net(skb->dev); struct net *net = dev_net(skb->dev);
struct ipv6hdr *hdr = ipv6_hdr(skb); struct ipv6hdr *hdr = ipv6_hdr(skb);
...@@ -187,6 +196,7 @@ static void lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr, ...@@ -187,6 +196,7 @@ static void lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
skb_dst_drop(skb); skb_dst_drop(skb);
skb_dst_set(skb, dst); skb_dst_set(skb, dst);
return dst->error;
} }
/* regular endpoint function */ /* regular endpoint function */
...@@ -200,7 +210,7 @@ static int input_action_end(struct sk_buff *skb, struct seg6_local_lwt *slwt) ...@@ -200,7 +210,7 @@ static int input_action_end(struct sk_buff *skb, struct seg6_local_lwt *slwt)
advance_nextseg(srh, &ipv6_hdr(skb)->daddr); advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
lookup_nexthop(skb, NULL, 0); seg6_lookup_nexthop(skb, NULL, 0);
return dst_input(skb); return dst_input(skb);
...@@ -220,7 +230,7 @@ static int input_action_end_x(struct sk_buff *skb, struct seg6_local_lwt *slwt) ...@@ -220,7 +230,7 @@ static int input_action_end_x(struct sk_buff *skb, struct seg6_local_lwt *slwt)
advance_nextseg(srh, &ipv6_hdr(skb)->daddr); advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
lookup_nexthop(skb, &slwt->nh6, 0); seg6_lookup_nexthop(skb, &slwt->nh6, 0);
return dst_input(skb); return dst_input(skb);
...@@ -239,7 +249,7 @@ static int input_action_end_t(struct sk_buff *skb, struct seg6_local_lwt *slwt) ...@@ -239,7 +249,7 @@ static int input_action_end_t(struct sk_buff *skb, struct seg6_local_lwt *slwt)
advance_nextseg(srh, &ipv6_hdr(skb)->daddr); advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
lookup_nexthop(skb, NULL, slwt->table); seg6_lookup_nexthop(skb, NULL, slwt->table);
return dst_input(skb); return dst_input(skb);
...@@ -331,7 +341,7 @@ static int input_action_end_dx6(struct sk_buff *skb, ...@@ -331,7 +341,7 @@ static int input_action_end_dx6(struct sk_buff *skb,
if (!ipv6_addr_any(&slwt->nh6)) if (!ipv6_addr_any(&slwt->nh6))
nhaddr = &slwt->nh6; nhaddr = &slwt->nh6;
lookup_nexthop(skb, nhaddr, 0); seg6_lookup_nexthop(skb, nhaddr, 0);
return dst_input(skb); return dst_input(skb);
drop: drop:
...@@ -380,7 +390,7 @@ static int input_action_end_dt6(struct sk_buff *skb, ...@@ -380,7 +390,7 @@ static int input_action_end_dt6(struct sk_buff *skb,
if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
goto drop; goto drop;
lookup_nexthop(skb, NULL, slwt->table); seg6_lookup_nexthop(skb, NULL, slwt->table);
return dst_input(skb); return dst_input(skb);
...@@ -406,7 +416,7 @@ static int input_action_end_b6(struct sk_buff *skb, struct seg6_local_lwt *slwt) ...@@ -406,7 +416,7 @@ static int input_action_end_b6(struct sk_buff *skb, struct seg6_local_lwt *slwt)
ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
skb_set_transport_header(skb, sizeof(struct ipv6hdr)); skb_set_transport_header(skb, sizeof(struct ipv6hdr));
lookup_nexthop(skb, NULL, 0); seg6_lookup_nexthop(skb, NULL, 0);
return dst_input(skb); return dst_input(skb);
...@@ -438,7 +448,7 @@ static int input_action_end_b6_encap(struct sk_buff *skb, ...@@ -438,7 +448,7 @@ static int input_action_end_b6_encap(struct sk_buff *skb,
ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
skb_set_transport_header(skb, sizeof(struct ipv6hdr)); skb_set_transport_header(skb, sizeof(struct ipv6hdr));
lookup_nexthop(skb, NULL, 0); seg6_lookup_nexthop(skb, NULL, 0);
return dst_input(skb); return dst_input(skb);
...@@ -447,6 +457,71 @@ static int input_action_end_b6_encap(struct sk_buff *skb, ...@@ -447,6 +457,71 @@ static int input_action_end_b6_encap(struct sk_buff *skb,
return err; return err;
} }
DEFINE_PER_CPU(struct seg6_bpf_srh_state, seg6_bpf_srh_states);
static int input_action_end_bpf(struct sk_buff *skb,
struct seg6_local_lwt *slwt)
{
struct seg6_bpf_srh_state *srh_state =
this_cpu_ptr(&seg6_bpf_srh_states);
struct seg6_bpf_srh_state local_srh_state;
struct ipv6_sr_hdr *srh;
int srhoff = 0;
int ret;
srh = get_and_validate_srh(skb);
if (!srh)
goto drop;
advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
/* preempt_disable is needed to protect the per-CPU buffer srh_state,
* which is also accessed by the bpf_lwt_seg6_* helpers
*/
preempt_disable();
srh_state->hdrlen = srh->hdrlen << 3;
srh_state->valid = 1;
rcu_read_lock();
bpf_compute_data_pointers(skb);
ret = bpf_prog_run_save_cb(slwt->bpf.prog, skb);
rcu_read_unlock();
local_srh_state = *srh_state;
preempt_enable();
switch (ret) {
case BPF_OK:
case BPF_REDIRECT:
break;
case BPF_DROP:
goto drop;
default:
pr_warn_once("bpf-seg6local: Illegal return value %u\n", ret);
goto drop;
}
if (unlikely((local_srh_state.hdrlen & 7) != 0))
goto drop;
if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
goto drop;
srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
srh->hdrlen = (u8)(local_srh_state.hdrlen >> 3);
if (!local_srh_state.valid &&
unlikely(!seg6_validate_srh(srh, (srh->hdrlen + 1) << 3)))
goto drop;
if (ret != BPF_REDIRECT)
seg6_lookup_nexthop(skb, NULL, 0);
return dst_input(skb);
drop:
kfree_skb(skb);
return -EINVAL;
}
static struct seg6_action_desc seg6_action_table[] = { static struct seg6_action_desc seg6_action_table[] = {
{ {
.action = SEG6_LOCAL_ACTION_END, .action = SEG6_LOCAL_ACTION_END,
...@@ -493,7 +568,13 @@ static struct seg6_action_desc seg6_action_table[] = { ...@@ -493,7 +568,13 @@ static struct seg6_action_desc seg6_action_table[] = {
.attrs = (1 << SEG6_LOCAL_SRH), .attrs = (1 << SEG6_LOCAL_SRH),
.input = input_action_end_b6_encap, .input = input_action_end_b6_encap,
.static_headroom = sizeof(struct ipv6hdr), .static_headroom = sizeof(struct ipv6hdr),
} },
{
.action = SEG6_LOCAL_ACTION_END_BPF,
.attrs = (1 << SEG6_LOCAL_BPF),
.input = input_action_end_bpf,
},
}; };
static struct seg6_action_desc *__get_action_desc(int action) static struct seg6_action_desc *__get_action_desc(int action)
...@@ -538,6 +619,7 @@ static const struct nla_policy seg6_local_policy[SEG6_LOCAL_MAX + 1] = { ...@@ -538,6 +619,7 @@ static const struct nla_policy seg6_local_policy[SEG6_LOCAL_MAX + 1] = {
.len = sizeof(struct in6_addr) }, .len = sizeof(struct in6_addr) },
[SEG6_LOCAL_IIF] = { .type = NLA_U32 }, [SEG6_LOCAL_IIF] = { .type = NLA_U32 },
[SEG6_LOCAL_OIF] = { .type = NLA_U32 }, [SEG6_LOCAL_OIF] = { .type = NLA_U32 },
[SEG6_LOCAL_BPF] = { .type = NLA_NESTED },
}; };
static int parse_nla_srh(struct nlattr **attrs, struct seg6_local_lwt *slwt) static int parse_nla_srh(struct nlattr **attrs, struct seg6_local_lwt *slwt)
...@@ -715,6 +797,75 @@ static int cmp_nla_oif(struct seg6_local_lwt *a, struct seg6_local_lwt *b) ...@@ -715,6 +797,75 @@ static int cmp_nla_oif(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
return 0; return 0;
} }
#define MAX_PROG_NAME 256
static const struct nla_policy bpf_prog_policy[SEG6_LOCAL_BPF_PROG_MAX + 1] = {
[SEG6_LOCAL_BPF_PROG] = { .type = NLA_U32, },
[SEG6_LOCAL_BPF_PROG_NAME] = { .type = NLA_NUL_STRING,
.len = MAX_PROG_NAME },
};
static int parse_nla_bpf(struct nlattr **attrs, struct seg6_local_lwt *slwt)
{
struct nlattr *tb[SEG6_LOCAL_BPF_PROG_MAX + 1];
struct bpf_prog *p;
int ret;
u32 fd;
ret = nla_parse_nested(tb, SEG6_LOCAL_BPF_PROG_MAX,
attrs[SEG6_LOCAL_BPF], bpf_prog_policy, NULL);
if (ret < 0)
return ret;
if (!tb[SEG6_LOCAL_BPF_PROG] || !tb[SEG6_LOCAL_BPF_PROG_NAME])
return -EINVAL;
slwt->bpf.name = nla_memdup(tb[SEG6_LOCAL_BPF_PROG_NAME], GFP_KERNEL);
if (!slwt->bpf.name)
return -ENOMEM;
fd = nla_get_u32(tb[SEG6_LOCAL_BPF_PROG]);
p = bpf_prog_get_type(fd, BPF_PROG_TYPE_LWT_SEG6LOCAL);
if (IS_ERR(p)) {
kfree(slwt->bpf.name);
return PTR_ERR(p);
}
slwt->bpf.prog = p;
return 0;
}
static int put_nla_bpf(struct sk_buff *skb, struct seg6_local_lwt *slwt)
{
struct nlattr *nest;
if (!slwt->bpf.prog)
return 0;
nest = nla_nest_start(skb, SEG6_LOCAL_BPF);
if (!nest)
return -EMSGSIZE;
if (nla_put_u32(skb, SEG6_LOCAL_BPF_PROG, slwt->bpf.prog->aux->id))
return -EMSGSIZE;
if (slwt->bpf.name &&
nla_put_string(skb, SEG6_LOCAL_BPF_PROG_NAME, slwt->bpf.name))
return -EMSGSIZE;
return nla_nest_end(skb, nest);
}
static int cmp_nla_bpf(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
{
if (!a->bpf.name && !b->bpf.name)
return 0;
if (!a->bpf.name || !b->bpf.name)
return 1;
return strcmp(a->bpf.name, b->bpf.name);
}
struct seg6_action_param { struct seg6_action_param {
int (*parse)(struct nlattr **attrs, struct seg6_local_lwt *slwt); int (*parse)(struct nlattr **attrs, struct seg6_local_lwt *slwt);
int (*put)(struct sk_buff *skb, struct seg6_local_lwt *slwt); int (*put)(struct sk_buff *skb, struct seg6_local_lwt *slwt);
...@@ -745,6 +896,11 @@ static struct seg6_action_param seg6_action_params[SEG6_LOCAL_MAX + 1] = { ...@@ -745,6 +896,11 @@ static struct seg6_action_param seg6_action_params[SEG6_LOCAL_MAX + 1] = {
[SEG6_LOCAL_OIF] = { .parse = parse_nla_oif, [SEG6_LOCAL_OIF] = { .parse = parse_nla_oif,
.put = put_nla_oif, .put = put_nla_oif,
.cmp = cmp_nla_oif }, .cmp = cmp_nla_oif },
[SEG6_LOCAL_BPF] = { .parse = parse_nla_bpf,
.put = put_nla_bpf,
.cmp = cmp_nla_bpf },
}; };
static int parse_nla_action(struct nlattr **attrs, struct seg6_local_lwt *slwt) static int parse_nla_action(struct nlattr **attrs, struct seg6_local_lwt *slwt)
...@@ -830,6 +986,13 @@ static void seg6_local_destroy_state(struct lwtunnel_state *lwt) ...@@ -830,6 +986,13 @@ static void seg6_local_destroy_state(struct lwtunnel_state *lwt)
struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt); struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt);
kfree(slwt->srh); kfree(slwt->srh);
if (slwt->desc->attrs & (1 << SEG6_LOCAL_BPF)) {
kfree(slwt->bpf.name);
bpf_prog_put(slwt->bpf.prog);
}
return;
} }
static int seg6_local_fill_encap(struct sk_buff *skb, static int seg6_local_fill_encap(struct sk_buff *skb,
...@@ -882,6 +1045,11 @@ static int seg6_local_get_encap_size(struct lwtunnel_state *lwt) ...@@ -882,6 +1045,11 @@ static int seg6_local_get_encap_size(struct lwtunnel_state *lwt)
if (attrs & (1 << SEG6_LOCAL_OIF)) if (attrs & (1 << SEG6_LOCAL_OIF))
nlsize += nla_total_size(4); nlsize += nla_total_size(4);
if (attrs & (1 << SEG6_LOCAL_BPF))
nlsize += nla_total_size(sizeof(struct nlattr)) +
nla_total_size(MAX_PROG_NAME) +
nla_total_size(4);
return nlsize; return nlsize;
} }
......
obj-$(CONFIG_XDP_SOCKETS) += xsk.o xdp_umem.o xsk_queue.o obj-$(CONFIG_XDP_SOCKETS) += xsk.o xdp_umem.o xsk_queue.o
// SPDX-License-Identifier: GPL-2.0 // SPDX-License-Identifier: GPL-2.0
/* XDP user-space packet buffer /* XDP user-space packet buffer
* Copyright(c) 2018 Intel Corporation. * Copyright(c) 2018 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/ */
#include <linux/init.h> #include <linux/init.h>
...@@ -25,39 +16,25 @@ ...@@ -25,39 +16,25 @@
#define XDP_UMEM_MIN_FRAME_SIZE 2048 #define XDP_UMEM_MIN_FRAME_SIZE 2048
int xdp_umem_create(struct xdp_umem **umem)
{
*umem = kzalloc(sizeof(**umem), GFP_KERNEL);
if (!(*umem))
return -ENOMEM;
return 0;
}
static void xdp_umem_unpin_pages(struct xdp_umem *umem) static void xdp_umem_unpin_pages(struct xdp_umem *umem)
{ {
unsigned int i; unsigned int i;
if (umem->pgs) { for (i = 0; i < umem->npgs; i++) {
for (i = 0; i < umem->npgs; i++) { struct page *page = umem->pgs[i];
struct page *page = umem->pgs[i];
set_page_dirty_lock(page);
put_page(page);
}
kfree(umem->pgs); set_page_dirty_lock(page);
umem->pgs = NULL; put_page(page);
} }
kfree(umem->pgs);
umem->pgs = NULL;
} }
static void xdp_umem_unaccount_pages(struct xdp_umem *umem) static void xdp_umem_unaccount_pages(struct xdp_umem *umem)
{ {
if (umem->user) { atomic_long_sub(umem->npgs, &umem->user->locked_vm);
atomic_long_sub(umem->npgs, &umem->user->locked_vm); free_uid(umem->user);
free_uid(umem->user);
}
} }
static void xdp_umem_release(struct xdp_umem *umem) static void xdp_umem_release(struct xdp_umem *umem)
...@@ -75,22 +52,18 @@ static void xdp_umem_release(struct xdp_umem *umem) ...@@ -75,22 +52,18 @@ static void xdp_umem_release(struct xdp_umem *umem)
umem->cq = NULL; umem->cq = NULL;
} }
if (umem->pgs) { xdp_umem_unpin_pages(umem);
xdp_umem_unpin_pages(umem);
task = get_pid_task(umem->pid, PIDTYPE_PID);
put_pid(umem->pid);
if (!task)
goto out;
mm = get_task_mm(task);
put_task_struct(task);
if (!mm)
goto out;
mmput(mm); task = get_pid_task(umem->pid, PIDTYPE_PID);
umem->pgs = NULL; put_pid(umem->pid);
} if (!task)
goto out;
mm = get_task_mm(task);
put_task_struct(task);
if (!mm)
goto out;
mmput(mm);
xdp_umem_unaccount_pages(umem); xdp_umem_unaccount_pages(umem);
out: out:
kfree(umem); kfree(umem);
...@@ -105,7 +78,7 @@ static void xdp_umem_release_deferred(struct work_struct *work) ...@@ -105,7 +78,7 @@ static void xdp_umem_release_deferred(struct work_struct *work)
void xdp_get_umem(struct xdp_umem *umem) void xdp_get_umem(struct xdp_umem *umem)
{ {
atomic_inc(&umem->users); refcount_inc(&umem->users);
} }
void xdp_put_umem(struct xdp_umem *umem) void xdp_put_umem(struct xdp_umem *umem)
...@@ -113,7 +86,7 @@ void xdp_put_umem(struct xdp_umem *umem) ...@@ -113,7 +86,7 @@ void xdp_put_umem(struct xdp_umem *umem)
if (!umem) if (!umem)
return; return;
if (atomic_dec_and_test(&umem->users)) { if (refcount_dec_and_test(&umem->users)) {
INIT_WORK(&umem->work, xdp_umem_release_deferred); INIT_WORK(&umem->work, xdp_umem_release_deferred);
schedule_work(&umem->work); schedule_work(&umem->work);
} }
...@@ -176,16 +149,13 @@ static int xdp_umem_account_pages(struct xdp_umem *umem) ...@@ -176,16 +149,13 @@ static int xdp_umem_account_pages(struct xdp_umem *umem)
return 0; return 0;
} }
int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
{ {
u32 frame_size = mr->frame_size, frame_headroom = mr->frame_headroom; u32 frame_size = mr->frame_size, frame_headroom = mr->frame_headroom;
u64 addr = mr->addr, size = mr->len; u64 addr = mr->addr, size = mr->len;
unsigned int nframes, nfpp; unsigned int nframes, nfpp;
int size_chk, err; int size_chk, err;
if (!umem)
return -EINVAL;
if (frame_size < XDP_UMEM_MIN_FRAME_SIZE || frame_size > PAGE_SIZE) { if (frame_size < XDP_UMEM_MIN_FRAME_SIZE || frame_size > PAGE_SIZE) {
/* Strictly speaking we could support this, if: /* Strictly speaking we could support this, if:
* - huge pages, or* * - huge pages, or*
...@@ -236,7 +206,7 @@ int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) ...@@ -236,7 +206,7 @@ int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
umem->frame_size_log2 = ilog2(frame_size); umem->frame_size_log2 = ilog2(frame_size);
umem->nfpp_mask = nfpp - 1; umem->nfpp_mask = nfpp - 1;
umem->nfpplog2 = ilog2(nfpp); umem->nfpplog2 = ilog2(nfpp);
atomic_set(&umem->users, 1); refcount_set(&umem->users, 1);
err = xdp_umem_account_pages(umem); err = xdp_umem_account_pages(umem);
if (err) if (err)
...@@ -254,7 +224,25 @@ int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) ...@@ -254,7 +224,25 @@ int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
return err; return err;
} }
struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr)
{
struct xdp_umem *umem;
int err;
umem = kzalloc(sizeof(*umem), GFP_KERNEL);
if (!umem)
return ERR_PTR(-ENOMEM);
err = xdp_umem_reg(umem, mr);
if (err) {
kfree(umem);
return ERR_PTR(err);
}
return umem;
}
bool xdp_umem_validate_queues(struct xdp_umem *umem) bool xdp_umem_validate_queues(struct xdp_umem *umem)
{ {
return (umem->fq && umem->cq); return umem->fq && umem->cq;
} }
/* SPDX-License-Identifier: GPL-2.0 /* SPDX-License-Identifier: GPL-2.0 */
* XDP user-space packet buffer /* XDP user-space packet buffer
* Copyright(c) 2018 Intel Corporation. * Copyright(c) 2018 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/ */
#ifndef XDP_UMEM_H_ #ifndef XDP_UMEM_H_
...@@ -36,7 +27,7 @@ struct xdp_umem { ...@@ -36,7 +27,7 @@ struct xdp_umem {
struct pid *pid; struct pid *pid;
unsigned long address; unsigned long address;
size_t size; size_t size;
atomic_t users; refcount_t users;
struct work_struct work; struct work_struct work;
}; };
...@@ -59,9 +50,8 @@ static inline char *xdp_umem_get_data_with_headroom(struct xdp_umem *umem, ...@@ -59,9 +50,8 @@ static inline char *xdp_umem_get_data_with_headroom(struct xdp_umem *umem,
} }
bool xdp_umem_validate_queues(struct xdp_umem *umem); bool xdp_umem_validate_queues(struct xdp_umem *umem);
int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr);
void xdp_get_umem(struct xdp_umem *umem); void xdp_get_umem(struct xdp_umem *umem);
void xdp_put_umem(struct xdp_umem *umem); void xdp_put_umem(struct xdp_umem *umem);
int xdp_umem_create(struct xdp_umem **umem); struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr);
#endif /* XDP_UMEM_H_ */ #endif /* XDP_UMEM_H_ */
/* SPDX-License-Identifier: GPL-2.0 /* SPDX-License-Identifier: GPL-2.0 */
* XDP user-space packet buffer /* XDP user-space packet buffer
* Copyright(c) 2018 Intel Corporation. * Copyright(c) 2018 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/ */
#ifndef XDP_UMEM_PROPS_H_ #ifndef XDP_UMEM_PROPS_H_
......
...@@ -5,15 +5,6 @@ ...@@ -5,15 +5,6 @@
* applications. * applications.
* Copyright(c) 2018 Intel Corporation. * Copyright(c) 2018 Intel Corporation.
* *
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* Author(s): Björn Töpel <bjorn.topel@intel.com> * Author(s): Björn Töpel <bjorn.topel@intel.com>
* Magnus Karlsson <magnus.karlsson@intel.com> * Magnus Karlsson <magnus.karlsson@intel.com>
*/ */
...@@ -151,6 +142,11 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m, ...@@ -151,6 +142,11 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
goto out; goto out;
} }
if (xs->queue_id >= xs->dev->real_num_tx_queues) {
err = -ENXIO;
goto out;
}
skb = sock_alloc_send_skb(sk, len, !need_wait, &err); skb = sock_alloc_send_skb(sk, len, !need_wait, &err);
if (unlikely(!skb)) { if (unlikely(!skb)) {
err = -EAGAIN; err = -EAGAIN;
...@@ -232,18 +228,12 @@ static int xsk_init_queue(u32 entries, struct xsk_queue **queue, ...@@ -232,18 +228,12 @@ static int xsk_init_queue(u32 entries, struct xsk_queue **queue,
if (!q) if (!q)
return -ENOMEM; return -ENOMEM;
/* Make sure queue is ready before it can be seen by others */
smp_wmb();
*queue = q; *queue = q;
return 0; return 0;
} }
static void __xsk_release(struct xdp_sock *xs)
{
/* Wait for driver to stop using the xdp socket. */
synchronize_net();
dev_put(xs->dev);
}
static int xsk_release(struct socket *sock) static int xsk_release(struct socket *sock)
{ {
struct sock *sk = sock->sk; struct sock *sk = sock->sk;
...@@ -260,7 +250,9 @@ static int xsk_release(struct socket *sock) ...@@ -260,7 +250,9 @@ static int xsk_release(struct socket *sock)
local_bh_enable(); local_bh_enable();
if (xs->dev) { if (xs->dev) {
__xsk_release(xs); /* Wait for driver to stop using the xdp socket. */
synchronize_net();
dev_put(xs->dev);
xs->dev = NULL; xs->dev = NULL;
} }
...@@ -294,9 +286,8 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) ...@@ -294,9 +286,8 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
{ {
struct sockaddr_xdp *sxdp = (struct sockaddr_xdp *)addr; struct sockaddr_xdp *sxdp = (struct sockaddr_xdp *)addr;
struct sock *sk = sock->sk; struct sock *sk = sock->sk;
struct net_device *dev, *dev_curr;
struct xdp_sock *xs = xdp_sk(sk); struct xdp_sock *xs = xdp_sk(sk);
struct xdp_umem *old_umem = NULL; struct net_device *dev;
int err = 0; int err = 0;
if (addr_len < sizeof(struct sockaddr_xdp)) if (addr_len < sizeof(struct sockaddr_xdp))
...@@ -305,7 +296,11 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) ...@@ -305,7 +296,11 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
return -EINVAL; return -EINVAL;
mutex_lock(&xs->mutex); mutex_lock(&xs->mutex);
dev_curr = xs->dev; if (xs->dev) {
err = -EBUSY;
goto out_release;
}
dev = dev_get_by_index(sock_net(sk), sxdp->sxdp_ifindex); dev = dev_get_by_index(sock_net(sk), sxdp->sxdp_ifindex);
if (!dev) { if (!dev) {
err = -ENODEV; err = -ENODEV;
...@@ -317,7 +312,8 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) ...@@ -317,7 +312,8 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
goto out_unlock; goto out_unlock;
} }
if (sxdp->sxdp_queue_id >= dev->num_rx_queues) { if ((xs->rx && sxdp->sxdp_queue_id >= dev->real_num_rx_queues) ||
(xs->tx && sxdp->sxdp_queue_id >= dev->real_num_tx_queues)) {
err = -EINVAL; err = -EINVAL;
goto out_unlock; goto out_unlock;
} }
...@@ -352,7 +348,6 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) ...@@ -352,7 +348,6 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
} }
xdp_get_umem(umem_xs->umem); xdp_get_umem(umem_xs->umem);
old_umem = xs->umem;
xs->umem = umem_xs->umem; xs->umem = umem_xs->umem;
sockfd_put(sock); sockfd_put(sock);
} else if (!xs->umem || !xdp_umem_validate_queues(xs->umem)) { } else if (!xs->umem || !xdp_umem_validate_queues(xs->umem)) {
...@@ -364,14 +359,6 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) ...@@ -364,14 +359,6 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
xskq_set_umem(xs->umem->cq, &xs->umem->props); xskq_set_umem(xs->umem->cq, &xs->umem->props);
} }
/* Rebind? */
if (dev_curr && (dev_curr != dev ||
xs->queue_id != sxdp->sxdp_queue_id)) {
__xsk_release(xs);
if (old_umem)
xdp_put_umem(old_umem);
}
xs->dev = dev; xs->dev = dev;
xs->queue_id = sxdp->sxdp_queue_id; xs->queue_id = sxdp->sxdp_queue_id;
...@@ -419,25 +406,23 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname, ...@@ -419,25 +406,23 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
struct xdp_umem_reg mr; struct xdp_umem_reg mr;
struct xdp_umem *umem; struct xdp_umem *umem;
if (xs->umem)
return -EBUSY;
if (copy_from_user(&mr, optval, sizeof(mr))) if (copy_from_user(&mr, optval, sizeof(mr)))
return -EFAULT; return -EFAULT;
mutex_lock(&xs->mutex); mutex_lock(&xs->mutex);
err = xdp_umem_create(&umem); if (xs->umem) {
mutex_unlock(&xs->mutex);
return -EBUSY;
}
err = xdp_umem_reg(umem, &mr); umem = xdp_umem_create(&mr);
if (err) { if (IS_ERR(umem)) {
kfree(umem);
mutex_unlock(&xs->mutex); mutex_unlock(&xs->mutex);
return err; return PTR_ERR(umem);
} }
/* Make sure umem is ready before it can be seen by others */ /* Make sure umem is ready before it can be seen by others */
smp_wmb(); smp_wmb();
xs->umem = umem; xs->umem = umem;
mutex_unlock(&xs->mutex); mutex_unlock(&xs->mutex);
return 0; return 0;
...@@ -448,13 +433,15 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname, ...@@ -448,13 +433,15 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
struct xsk_queue **q; struct xsk_queue **q;
int entries; int entries;
if (!xs->umem)
return -EINVAL;
if (copy_from_user(&entries, optval, sizeof(entries))) if (copy_from_user(&entries, optval, sizeof(entries)))
return -EFAULT; return -EFAULT;
mutex_lock(&xs->mutex); mutex_lock(&xs->mutex);
if (!xs->umem) {
mutex_unlock(&xs->mutex);
return -EINVAL;
}
q = (optname == XDP_UMEM_FILL_RING) ? &xs->umem->fq : q = (optname == XDP_UMEM_FILL_RING) ? &xs->umem->fq :
&xs->umem->cq; &xs->umem->cq;
err = xsk_init_queue(entries, q, true); err = xsk_init_queue(entries, q, true);
...@@ -504,6 +491,35 @@ static int xsk_getsockopt(struct socket *sock, int level, int optname, ...@@ -504,6 +491,35 @@ static int xsk_getsockopt(struct socket *sock, int level, int optname,
return 0; return 0;
} }
case XDP_MMAP_OFFSETS:
{
struct xdp_mmap_offsets off;
if (len < sizeof(off))
return -EINVAL;
off.rx.producer = offsetof(struct xdp_rxtx_ring, ptrs.producer);
off.rx.consumer = offsetof(struct xdp_rxtx_ring, ptrs.consumer);
off.rx.desc = offsetof(struct xdp_rxtx_ring, desc);
off.tx.producer = offsetof(struct xdp_rxtx_ring, ptrs.producer);
off.tx.consumer = offsetof(struct xdp_rxtx_ring, ptrs.consumer);
off.tx.desc = offsetof(struct xdp_rxtx_ring, desc);
off.fr.producer = offsetof(struct xdp_umem_ring, ptrs.producer);
off.fr.consumer = offsetof(struct xdp_umem_ring, ptrs.consumer);
off.fr.desc = offsetof(struct xdp_umem_ring, desc);
off.cr.producer = offsetof(struct xdp_umem_ring, ptrs.producer);
off.cr.consumer = offsetof(struct xdp_umem_ring, ptrs.consumer);
off.cr.desc = offsetof(struct xdp_umem_ring, desc);
len = sizeof(off);
if (copy_to_user(optval, &off, len))
return -EFAULT;
if (put_user(len, optlen))
return -EFAULT;
return 0;
}
default: default:
break; break;
} }
...@@ -518,21 +534,23 @@ static int xsk_mmap(struct file *file, struct socket *sock, ...@@ -518,21 +534,23 @@ static int xsk_mmap(struct file *file, struct socket *sock,
unsigned long size = vma->vm_end - vma->vm_start; unsigned long size = vma->vm_end - vma->vm_start;
struct xdp_sock *xs = xdp_sk(sock->sk); struct xdp_sock *xs = xdp_sk(sock->sk);
struct xsk_queue *q = NULL; struct xsk_queue *q = NULL;
struct xdp_umem *umem;
unsigned long pfn; unsigned long pfn;
struct page *qpg; struct page *qpg;
if (offset == XDP_PGOFF_RX_RING) { if (offset == XDP_PGOFF_RX_RING) {
q = xs->rx; q = READ_ONCE(xs->rx);
} else if (offset == XDP_PGOFF_TX_RING) { } else if (offset == XDP_PGOFF_TX_RING) {
q = xs->tx; q = READ_ONCE(xs->tx);
} else { } else {
if (!xs->umem) umem = READ_ONCE(xs->umem);
if (!umem)
return -EINVAL; return -EINVAL;
if (offset == XDP_UMEM_PGOFF_FILL_RING) if (offset == XDP_UMEM_PGOFF_FILL_RING)
q = xs->umem->fq; q = READ_ONCE(umem->fq);
else if (offset == XDP_UMEM_PGOFF_COMPLETION_RING) else if (offset == XDP_UMEM_PGOFF_COMPLETION_RING)
q = xs->umem->cq; q = READ_ONCE(umem->cq);
} }
if (!q) if (!q)
...@@ -554,24 +572,24 @@ static struct proto xsk_proto = { ...@@ -554,24 +572,24 @@ static struct proto xsk_proto = {
}; };
static const struct proto_ops xsk_proto_ops = { static const struct proto_ops xsk_proto_ops = {
.family = PF_XDP, .family = PF_XDP,
.owner = THIS_MODULE, .owner = THIS_MODULE,
.release = xsk_release, .release = xsk_release,
.bind = xsk_bind, .bind = xsk_bind,
.connect = sock_no_connect, .connect = sock_no_connect,
.socketpair = sock_no_socketpair, .socketpair = sock_no_socketpair,
.accept = sock_no_accept, .accept = sock_no_accept,
.getname = sock_no_getname, .getname = sock_no_getname,
.poll = xsk_poll, .poll = xsk_poll,
.ioctl = sock_no_ioctl, .ioctl = sock_no_ioctl,
.listen = sock_no_listen, .listen = sock_no_listen,
.shutdown = sock_no_shutdown, .shutdown = sock_no_shutdown,
.setsockopt = xsk_setsockopt, .setsockopt = xsk_setsockopt,
.getsockopt = xsk_getsockopt, .getsockopt = xsk_getsockopt,
.sendmsg = xsk_sendmsg, .sendmsg = xsk_sendmsg,
.recvmsg = sock_no_recvmsg, .recvmsg = sock_no_recvmsg,
.mmap = xsk_mmap, .mmap = xsk_mmap,
.sendpage = sock_no_sendpage, .sendpage = sock_no_sendpage,
}; };
static void xsk_destruct(struct sock *sk) static void xsk_destruct(struct sock *sk)
......
// SPDX-License-Identifier: GPL-2.0 // SPDX-License-Identifier: GPL-2.0
/* XDP user-space ring structure /* XDP user-space ring structure
* Copyright(c) 2018 Intel Corporation. * Copyright(c) 2018 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/ */
#include <linux/slab.h> #include <linux/slab.h>
...@@ -31,8 +22,7 @@ static u32 xskq_umem_get_ring_size(struct xsk_queue *q) ...@@ -31,8 +22,7 @@ static u32 xskq_umem_get_ring_size(struct xsk_queue *q)
static u32 xskq_rxtx_get_ring_size(struct xsk_queue *q) static u32 xskq_rxtx_get_ring_size(struct xsk_queue *q)
{ {
return (sizeof(struct xdp_ring) + return sizeof(struct xdp_ring) + q->nentries * sizeof(struct xdp_desc);
q->nentries * sizeof(struct xdp_desc));
} }
struct xsk_queue *xskq_create(u32 nentries, bool umem_queue) struct xsk_queue *xskq_create(u32 nentries, bool umem_queue)
......
/* SPDX-License-Identifier: GPL-2.0 /* SPDX-License-Identifier: GPL-2.0 */
* XDP user-space ring structure /* XDP user-space ring structure
* Copyright(c) 2018 Intel Corporation. * Copyright(c) 2018 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/ */
#ifndef _LINUX_XSK_QUEUE_H #ifndef _LINUX_XSK_QUEUE_H
...@@ -22,6 +13,23 @@ ...@@ -22,6 +13,23 @@
#define RX_BATCH_SIZE 16 #define RX_BATCH_SIZE 16
struct xdp_ring {
u32 producer ____cacheline_aligned_in_smp;
u32 consumer ____cacheline_aligned_in_smp;
};
/* Used for the RX and TX queues for packets */
struct xdp_rxtx_ring {
struct xdp_ring ptrs;
struct xdp_desc desc[0] ____cacheline_aligned_in_smp;
};
/* Used for the fill and completion queues for buffers */
struct xdp_umem_ring {
struct xdp_ring ptrs;
u32 desc[0] ____cacheline_aligned_in_smp;
};
struct xsk_queue { struct xsk_queue {
struct xdp_umem_props umem_props; struct xdp_umem_props umem_props;
u32 ring_mask; u32 ring_mask;
...@@ -232,12 +240,12 @@ static inline void xskq_produce_flush_desc(struct xsk_queue *q) ...@@ -232,12 +240,12 @@ static inline void xskq_produce_flush_desc(struct xsk_queue *q)
static inline bool xskq_full_desc(struct xsk_queue *q) static inline bool xskq_full_desc(struct xsk_queue *q)
{ {
return (xskq_nb_avail(q, q->nentries) == q->nentries); return xskq_nb_avail(q, q->nentries) == q->nentries;
} }
static inline bool xskq_empty_desc(struct xsk_queue *q) static inline bool xskq_empty_desc(struct xsk_queue *q)
{ {
return (xskq_nb_free(q, q->prod_tail, 1) == q->nentries); return xskq_nb_free(q, q->prod_tail, 1) == q->nentries;
} }
void xskq_set_umem(struct xsk_queue *q, struct xdp_umem_props *umem_props); void xskq_set_umem(struct xsk_queue *q, struct xdp_umem_props *umem_props);
......
...@@ -51,6 +51,7 @@ hostprogs-y += cpustat ...@@ -51,6 +51,7 @@ hostprogs-y += cpustat
hostprogs-y += xdp_adjust_tail hostprogs-y += xdp_adjust_tail
hostprogs-y += xdpsock hostprogs-y += xdpsock
hostprogs-y += xdp_fwd hostprogs-y += xdp_fwd
hostprogs-y += task_fd_query
# Libbpf dependencies # Libbpf dependencies
LIBBPF = $(TOOLS_PATH)/lib/bpf/libbpf.a LIBBPF = $(TOOLS_PATH)/lib/bpf/libbpf.a
...@@ -105,6 +106,7 @@ cpustat-objs := bpf_load.o cpustat_user.o ...@@ -105,6 +106,7 @@ cpustat-objs := bpf_load.o cpustat_user.o
xdp_adjust_tail-objs := xdp_adjust_tail_user.o xdp_adjust_tail-objs := xdp_adjust_tail_user.o
xdpsock-objs := bpf_load.o xdpsock_user.o xdpsock-objs := bpf_load.o xdpsock_user.o
xdp_fwd-objs := bpf_load.o xdp_fwd_user.o xdp_fwd-objs := bpf_load.o xdp_fwd_user.o
task_fd_query-objs := bpf_load.o task_fd_query_user.o $(TRACE_HELPERS)
# Tell kbuild to always build the programs # Tell kbuild to always build the programs
always := $(hostprogs-y) always := $(hostprogs-y)
...@@ -160,6 +162,7 @@ always += cpustat_kern.o ...@@ -160,6 +162,7 @@ always += cpustat_kern.o
always += xdp_adjust_tail_kern.o always += xdp_adjust_tail_kern.o
always += xdpsock_kern.o always += xdpsock_kern.o
always += xdp_fwd_kern.o always += xdp_fwd_kern.o
always += task_fd_query_kern.o
HOSTCFLAGS += -I$(objtree)/usr/include HOSTCFLAGS += -I$(objtree)/usr/include
HOSTCFLAGS += -I$(srctree)/tools/lib/ HOSTCFLAGS += -I$(srctree)/tools/lib/
...@@ -175,6 +178,7 @@ HOSTCFLAGS_offwaketime_user.o += -I$(srctree)/tools/lib/bpf/ ...@@ -175,6 +178,7 @@ HOSTCFLAGS_offwaketime_user.o += -I$(srctree)/tools/lib/bpf/
HOSTCFLAGS_spintest_user.o += -I$(srctree)/tools/lib/bpf/ HOSTCFLAGS_spintest_user.o += -I$(srctree)/tools/lib/bpf/
HOSTCFLAGS_trace_event_user.o += -I$(srctree)/tools/lib/bpf/ HOSTCFLAGS_trace_event_user.o += -I$(srctree)/tools/lib/bpf/
HOSTCFLAGS_sampleip_user.o += -I$(srctree)/tools/lib/bpf/ HOSTCFLAGS_sampleip_user.o += -I$(srctree)/tools/lib/bpf/
HOSTCFLAGS_task_fd_query_user.o += -I$(srctree)/tools/lib/bpf/
HOST_LOADLIBES += $(LIBBPF) -lelf HOST_LOADLIBES += $(LIBBPF) -lelf
HOSTLOADLIBES_tracex4 += -lrt HOSTLOADLIBES_tracex4 += -lrt
......
// SPDX-License-Identifier: GPL-2.0
#include <linux/version.h>
#include <linux/ptrace.h>
#include <uapi/linux/bpf.h>
#include "bpf_helpers.h"
SEC("kprobe/blk_start_request")
int bpf_prog1(struct pt_regs *ctx)
{
return 0;
}
SEC("kretprobe/blk_account_io_completion")
int bpf_prog2(struct pt_regs *ctx)
{
return 0;
}
char _license[] SEC("license") = "GPL";
u32 _version SEC("version") = LINUX_VERSION_CODE;
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
...@@ -95,7 +95,7 @@ class HeaderParser(object): ...@@ -95,7 +95,7 @@ class HeaderParser(object):
return capture.group(1) return capture.group(1)
def parse_desc(self): def parse_desc(self):
p = re.compile(' \* ?(?:\t| {6,8})Description$') p = re.compile(' \* ?(?:\t| {5,8})Description$')
capture = p.match(self.line) capture = p.match(self.line)
if not capture: if not capture:
# Helper can have empty description and we might be parsing another # Helper can have empty description and we might be parsing another
...@@ -109,7 +109,7 @@ class HeaderParser(object): ...@@ -109,7 +109,7 @@ class HeaderParser(object):
if self.line == ' *\n': if self.line == ' *\n':
desc += '\n' desc += '\n'
else: else:
p = re.compile(' \* ?(?:\t| {6,8})(?:\t| {8})(.*)') p = re.compile(' \* ?(?:\t| {5,8})(?:\t| {8})(.*)')
capture = p.match(self.line) capture = p.match(self.line)
if capture: if capture:
desc += capture.group(1) + '\n' desc += capture.group(1) + '\n'
...@@ -118,7 +118,7 @@ class HeaderParser(object): ...@@ -118,7 +118,7 @@ class HeaderParser(object):
return desc return desc
def parse_ret(self): def parse_ret(self):
p = re.compile(' \* ?(?:\t| {6,8})Return$') p = re.compile(' \* ?(?:\t| {5,8})Return$')
capture = p.match(self.line) capture = p.match(self.line)
if not capture: if not capture:
# Helper can have empty retval and we might be parsing another # Helper can have empty retval and we might be parsing another
...@@ -132,7 +132,7 @@ class HeaderParser(object): ...@@ -132,7 +132,7 @@ class HeaderParser(object):
if self.line == ' *\n': if self.line == ' *\n':
ret += '\n' ret += '\n'
else: else:
p = re.compile(' \* ?(?:\t| {6,8})(?:\t| {8})(.*)') p = re.compile(' \* ?(?:\t| {5,8})(?:\t| {8})(.*)')
capture = p.match(self.line) capture = p.match(self.line)
if capture: if capture:
ret += capture.group(1) + '\n' ret += capture.group(1) + '\n'
......
此差异已折叠。
...@@ -16,7 +16,7 @@ SYNOPSIS ...@@ -16,7 +16,7 @@ SYNOPSIS
**bpftool** **version** **bpftool** **version**
*OBJECT* := { **map** | **program** | **cgroup** } *OBJECT* := { **map** | **program** | **cgroup** | **perf** }
*OPTIONS* := { { **-V** | **--version** } | { **-h** | **--help** } *OPTIONS* := { { **-V** | **--version** } | { **-h** | **--help** }
| { **-j** | **--json** } [{ **-p** | **--pretty** }] } | { **-j** | **--json** } [{ **-p** | **--pretty** }] }
...@@ -30,6 +30,8 @@ SYNOPSIS ...@@ -30,6 +30,8 @@ SYNOPSIS
*CGROUP-COMMANDS* := { **show** | **list** | **attach** | **detach** | **help** } *CGROUP-COMMANDS* := { **show** | **list** | **attach** | **detach** | **help** }
*PERF-COMMANDS* := { **show** | **list** | **help** }
DESCRIPTION DESCRIPTION
=========== ===========
*bpftool* allows for inspection and simple modification of BPF objects *bpftool* allows for inspection and simple modification of BPF objects
...@@ -56,3 +58,4 @@ OPTIONS ...@@ -56,3 +58,4 @@ OPTIONS
SEE ALSO SEE ALSO
======== ========
**bpftool-map**\ (8), **bpftool-prog**\ (8), **bpftool-cgroup**\ (8) **bpftool-map**\ (8), **bpftool-prog**\ (8), **bpftool-cgroup**\ (8)
**bpftool-perf**\ (8)
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册