Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next

Alexei Starovoitov says: ==================== pull-request: bpf-next 2018-05-24 The following pull-request contains BPF updates for your *net-next* tree. The main changes are: 1) Björn Töpel cleans up AF_XDP (removes rebind, explicit cache alignment from uapi, etc). 2) David Ahern adds mtu checks to bpf_ipv{4,6}_fib_lookup() helpers. 3) Jesper Dangaard Brouer adds bulking support to ndo_xdp_xmit. 4) Jiong Wang adds support for indirect and arithmetic shifts to NFP 5) Martin KaFai Lau cleans up BTF uapi and makes the btf_header extensible. 6) Mathieu Xhonneux adds an End.BPF action to seg6local with BPF helpers allowing to edit/grow/shrink a SRH and apply on a packet generic SRv6 actions. 7) Sandipan Das adds support for bpf2bpf function calls in ppc64 JIT. 8) Yonghong Song adds BPF_TASK_FD_QUERY command for introspection of tracing events. 9) other misc fixes from Gustavo A. R. Silva, Sirio Balmelli, John Fastabend, and Magnus Karlsson ==================== Signed-off-by: N David S. Miller <davem@davemloft.net>

Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Alexei Starovoitov says: ==================== pull-request: bpf-next 2018-05-24 The following pull-request contains BPF updates for your *net-next* tree. The main changes are: 1) Björn Töpel cleans up AF_XDP (removes rebind, explicit cache alignment from uapi, etc). 2) David Ahern adds mtu checks to bpf_ipv{4,6}_fib_lookup() helpers. 3) Jesper Dangaard Brouer adds bulking support to ndo_xdp_xmit. 4) Jiong Wang adds support for indirect and arithmetic shifts to NFP 5) Martin KaFai Lau cleans up BTF uapi and makes the btf_header extensible. 6) Mathieu Xhonneux adds an End.BPF action to seg6local with BPF helpers allowing to edit/grow/shrink a SRH and apply on a packet generic SRv6 actions. 7) Sandipan Das adds support for bpf2bpf function calls in ppc64 JIT. 8) Yonghong Song adds BPF_TASK_FD_QUERY command for introspection of tracing events. 9) other misc fixes from Gustavo A. R. Silva, Sirio Balmelli, John Fastabend, and Magnus Karlsson ==================== Signed-off-by: N David S. Miller <davem@davemloft.net>
90fed9c9 · David S. Miller · 49a473f5 · 10f67868 · 90fed9c9 · 90fed9c9
90 changed file
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -167,25 +167,37 @@ static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
 static void bpf_jit_emit_func_call(u32 *image, struct codegen_context *ctx, u64 func)
 {
+	unsigned int i, ctx_idx = ctx->idx;
+	/* Load function address into r12 */
+	PPC_LI64(12, func);
+	/* For bpf-to-bpf function calls, the callee's address is unknown
+	 * until the last extra pass. As seen above, we use PPC_LI64() to
+	 * load the callee's address, but this may optimize the number of
+	 * instructions required based on the nature of the address.
+	 *
+	 * Since we don't want the number of instructions emitted to change,
+	 * we pad the optimized PPC_LI64() call with NOPs to guarantee that
+	 * we always have a five-instruction sequence, which is the maximum
+	 * that PPC_LI64() can emit.
+	 */
+	for (i = ctx->idx - ctx_idx; i < 5; i++)
+		PPC_NOP();
 #ifdef PPC64_ELF_ABI_v1
-	/* func points to the function descriptor */
-	PPC_LI64(b2p[TMP_REG_2], func);
-	/* Load actual entry point from function descriptor */
-	PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_2], 0);
-	/* ... and move it to LR */
-	PPC_MTLR(b2p[TMP_REG_1]);
 	/*
 	 * Load TOC from function descriptor at offset 8.
 	 * We can clobber r2 since we get called through a
 	 * function pointer (so caller will save/restore r2)
 	 * and since we don't use a TOC ourself.
 	 */
-	PPC_BPF_LL(2, b2p[TMP_REG_2], 8);
+	PPC_BPF_LL(2, 12, 8);
-#else
+	/* Load actual entry point from function descriptor */
-	/* We can clobber r12 */
+	PPC_BPF_LL(12, 12, 0);
-	PPC_FUNC_ADDR(12, func);
-	PPC_MTLR(12);
 #endif
+	PPC_MTLR(12);
 	PPC_BLRL();
 }
@@ -256,7 +268,7 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32
 /* Assemble the body code between the prologue & epilogue */
 static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
 			      struct codegen_context *ctx,
-			      u32 *addrs)
+			      u32 *addrs, bool extra_pass)
 {
 	const struct bpf_insn *insn = fp->insnsi;
 	int flen = fp->len;
@@ -712,11 +724,25 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
 			break;
 		/*
-		 * Call kernel helper
+		 * Call kernel helper or bpf function
 		 */
 		case BPF_JMP | BPF_CALL:
 			ctx->seen |= SEEN_FUNC;
-			func = (u8 *) __bpf_call_base + imm;
+			/* bpf function call */
+			if (insn[i].src_reg == BPF_PSEUDO_CALL)
+				if (!extra_pass)
+					func = NULL;
+				else if (fp->aux->func && off < fp->aux->func_cnt)
+					/* use the subprog id from the off
+					 * field to lookup the callee address
+					 */
+					func = (u8 *) fp->aux->func[off]->bpf_func;
+				else
+					return -EINVAL;
+			/* kernel helper call */
+			else
+				func = (u8 *) __bpf_call_base + imm;
 			bpf_jit_emit_func_call(image, ctx, (u64)func);
@@ -864,6 +890,14 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
 	return 0;
 }
+struct powerpc64_jit_data {
+	struct bpf_binary_header *header;
+	u32 *addrs;
+	u8 *image;
+	u32 proglen;
+	struct codegen_context ctx;
+};
 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 {
 	u32 proglen;
@@ -871,6 +905,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 	u8 *image = NULL;
 	u32 *code_base;
 	u32 *addrs;
+	struct powerpc64_jit_data *jit_data;
 	struct codegen_context cgctx;
 	int pass;
 	int flen;
@@ -878,6 +913,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 	struct bpf_prog *org_fp = fp;
 	struct bpf_prog *tmp_fp;
 	bool bpf_blinded = false;
+	bool extra_pass = false;
 	if (!fp->jit_requested)
 		return org_fp;
@@ -891,11 +927,32 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 		fp = tmp_fp;
 	}
+	jit_data = fp->aux->jit_data;
+	if (!jit_data) {
+		jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL);
+		if (!jit_data) {
+			fp = org_fp;
+			goto out;
+		}
+		fp->aux->jit_data = jit_data;
+	}
 	flen = fp->len;
+	addrs = jit_data->addrs;
+	if (addrs) {
+		cgctx = jit_data->ctx;
+		image = jit_data->image;
+		bpf_hdr = jit_data->header;
+		proglen = jit_data->proglen;
+		alloclen = proglen + FUNCTION_DESCR_SIZE;
+		extra_pass = true;
+		goto skip_init_ctx;
+	}
 	addrs = kzalloc((flen+1) * sizeof(*addrs), GFP_KERNEL);
 	if (addrs == NULL) {
 		fp = org_fp;
-		goto out;
+		goto out_addrs;
 	}
 	memset(&cgctx, 0, sizeof(struct codegen_context));
@@ -904,10 +961,10 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 	cgctx.stack_size = round_up(fp->aux->stack_depth, 16);
 	/* Scouting faux-generate pass 0 */
-	if (bpf_jit_build_body(fp, 0, &cgctx, addrs)) {
+	if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) {
 		/* We hit something illegal or unsupported. */
 		fp = org_fp;
-		goto out;
+		goto out_addrs;
 	}
 	/*
@@ -925,9 +982,10 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 			bpf_jit_fill_ill_insns);
 	if (!bpf_hdr) {
 		fp = org_fp;
-		goto out;
+		goto out_addrs;
 	}
+skip_init_ctx:
 	code_base = (u32 *)(image + FUNCTION_DESCR_SIZE);
 	/* Code generation passes 1-2 */
@@ -935,7 +993,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 		/* Now build the prologue, body code & epilogue for real. */
 		cgctx.idx = 0;
 		bpf_jit_build_prologue(code_base, &cgctx);
-		bpf_jit_build_body(fp, code_base, &cgctx, addrs);
+		bpf_jit_build_body(fp, code_base, &cgctx, addrs, extra_pass);
 		bpf_jit_build_epilogue(code_base, &cgctx);
 		if (bpf_jit_enable > 1)
@@ -961,10 +1019,20 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 	fp->jited_len = alloclen;
 	bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + (bpf_hdr->pages * PAGE_SIZE));
+	if (!fp->is_func || extra_pass) {
+out_addrs:
+		kfree(addrs);
+		kfree(jit_data);
+		fp->aux->jit_data = NULL;
+	} else {
+		jit_data->addrs = addrs;
+		jit_data->ctx = cgctx;
+		jit_data->proglen = proglen;
+		jit_data->image = image;
+		jit_data->header = bpf_hdr;
+	}
 out:
-	kfree(addrs);
 	if (bpf_blinded)
 		bpf_jit_prog_release_other(fp, fp == org_fp ? tmp_fp : org_fp);

--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -3664,14 +3664,19 @@ netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
 * @dev: netdev
 * @xdp: XDP buffer
 *
- * Returns Zero if sent, else an error code
+ * Returns number of frames successfully sent. Frames that fail are
+ * free'ed via XDP return API.
+ *
+ * For error cases, a negative errno code is returned and no-frames
+ * are transmitted (caller must handle freeing frames).
 **/
-int i40e_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf)
+int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames)
 {
 	struct i40e_netdev_priv *np = netdev_priv(dev);
 	unsigned int queue_index = smp_processor_id();
 	struct i40e_vsi *vsi = np->vsi;
-	int err;
+	int drops = 0;
+	int i;
 	if (test_bit(__I40E_VSI_DOWN, vsi->state))
 		return -ENETDOWN;
@@ -3679,11 +3684,18 @@ int i40e_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf)
 	if (!i40e_enabled_xdp_vsi(vsi) || queue_index >= vsi->num_queue_pairs)
 		return -ENXIO;
-	err = i40e_xmit_xdp_ring(xdpf, vsi->xdp_rings[queue_index]);
+	for (i = 0; i < n; i++) {
-	if (err != I40E_XDP_TX)
+		struct xdp_frame *xdpf = frames[i];
-		return -ENOSPC;
+		int err;
-	return 0;
+		err = i40e_xmit_xdp_ring(xdpf, vsi->xdp_rings[queue_index]);
+		if (err != I40E_XDP_TX) {
+			xdp_return_frame_rx_napi(xdpf);
+			drops++;
+		}
+	}
+	return n - drops;
 }
 /**

--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
@@ -487,7 +487,7 @@ u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw);
 void i40e_detect_recover_hung(struct i40e_vsi *vsi);
 int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size);
 bool __i40e_chk_linearize(struct sk_buff *skb);
-int i40e_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf);
+int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames);
 void i40e_xdp_flush(struct net_device *dev);
 /**

--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -10022,11 +10022,13 @@ static int ixgbe_xdp(struct net_device *dev, struct netdev_bpf *xdp)
 	}
 }
-static int ixgbe_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf)
+static int ixgbe_xdp_xmit(struct net_device *dev, int n,
+			  struct xdp_frame **frames)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(dev);
 	struct ixgbe_ring *ring;
-	int err;
+	int drops = 0;
+	int i;
 	if (unlikely(test_bit(__IXGBE_DOWN, &adapter->state)))
 		return -ENETDOWN;
@@ -10038,11 +10040,18 @@ static int ixgbe_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf)
 	if (unlikely(!ring))
 		return -ENXIO;
-	err = ixgbe_xmit_xdp_ring(adapter, xdpf);
+	for (i = 0; i < n; i++) {
-	if (err != IXGBE_XDP_TX)
+		struct xdp_frame *xdpf = frames[i];
-		return -ENOSPC;
+		int err;
-	return 0;
+		err = ixgbe_xmit_xdp_ring(adapter, xdpf);
+		if (err != IXGBE_XDP_TX) {
+			xdp_return_frame_rx_napi(xdpf);
+			drops++;
+		}
+	}
+	return n - drops;
 }
 static void ixgbe_xdp_flush(struct net_device *dev)

--- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
@@ -211,6 +211,60 @@ emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, u16 addr, u8 defer)
 	emit_br_relo(nfp_prog, mask, addr, defer, RELO_BR_REL);
 }
+static void
+__emit_br_bit(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 addr, u8 defer,
+	      bool set, bool src_lmextn)
+{
+	u16 addr_lo, addr_hi;
+	u64 insn;
+	addr_lo = addr & (OP_BR_BIT_ADDR_LO >> __bf_shf(OP_BR_BIT_ADDR_LO));
+	addr_hi = addr != addr_lo;
+	insn = OP_BR_BIT_BASE |
+		FIELD_PREP(OP_BR_BIT_A_SRC, areg) |
+		FIELD_PREP(OP_BR_BIT_B_SRC, breg) |
+		FIELD_PREP(OP_BR_BIT_BV, set) |
+		FIELD_PREP(OP_BR_BIT_DEFBR, defer) |
+		FIELD_PREP(OP_BR_BIT_ADDR_LO, addr_lo) |
+		FIELD_PREP(OP_BR_BIT_ADDR_HI, addr_hi) |
+		FIELD_PREP(OP_BR_BIT_SRC_LMEXTN, src_lmextn);
+	nfp_prog_push(nfp_prog, insn);
+}
+static void
+emit_br_bit_relo(struct nfp_prog *nfp_prog, swreg src, u8 bit, u16 addr,
+		 u8 defer, bool set, enum nfp_relo_type relo)
+{
+	struct nfp_insn_re_regs reg;
+	int err;
+	/* NOTE: The bit to test is specified as an rotation amount, such that
+	 *	 the bit to test will be placed on the MSB of the result when
+	 *	 doing a rotate right. For bit X, we need right rotate X + 1.
+	 */
+	bit += 1;
+	err = swreg_to_restricted(reg_none(), src, reg_imm(bit), &reg, false);
+	if (err) {
+		nfp_prog->error = err;
+		return;
+	}
+	__emit_br_bit(nfp_prog, reg.areg, reg.breg, addr, defer, set,
+		      reg.src_lmextn);
+	nfp_prog->prog[nfp_prog->prog_len - 1] |=
+		FIELD_PREP(OP_RELO_TYPE, relo);
+}
+static void
+emit_br_bset(struct nfp_prog *nfp_prog, swreg src, u8 bit, u16 addr, u8 defer)
+{
+	emit_br_bit_relo(nfp_prog, src, bit, addr, defer, true, RELO_BR_REL);
+}
 static void
 __emit_immed(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 imm_hi,
 	     enum immed_width width, bool invert,
@@ -309,6 +363,19 @@ emit_shf(struct nfp_prog *nfp_prog, swreg dst,
 		   reg.dst_lmextn, reg.src_lmextn);
 }
+static void
+emit_shf_indir(struct nfp_prog *nfp_prog, swreg dst,
+	       swreg lreg, enum shf_op op, swreg rreg, enum shf_sc sc)
+{
+	if (sc == SHF_SC_R_ROT) {
+		pr_err("indirect shift is not allowed on rotation\n");
+		nfp_prog->error = -EFAULT;
+		return;
+	}
+	emit_shf(nfp_prog, dst, lreg, op, rreg, sc, 0);
+}
 static void
 __emit_alu(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab,
 	   u16 areg, enum alu_op op, u16 breg, bool swap, bool wr_both,
@@ -1629,26 +1696,142 @@ static int neg_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 	return 0;
 }
+/* Pseudo code:
+ *   if shift_amt >= 32
+ *     dst_high = dst_low << shift_amt[4:0]
+ *     dst_low = 0;
+ *   else
+ *     dst_high = (dst_high, dst_low) >> (32 - shift_amt)
+ *     dst_low = dst_low << shift_amt
+ *
+ * The indirect shift will use the same logic at runtime.
+ */
+static int __shl_imm64(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt)
+{
+	if (shift_amt < 32) {
+		emit_shf(nfp_prog, reg_both(dst + 1), reg_a(dst + 1),
+			 SHF_OP_NONE, reg_b(dst), SHF_SC_R_DSHF,
+			 32 - shift_amt);
+		emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE,
+			 reg_b(dst), SHF_SC_L_SHF, shift_amt);
+	} else if (shift_amt == 32) {
+		wrp_reg_mov(nfp_prog, dst + 1, dst);
+		wrp_immed(nfp_prog, reg_both(dst), 0);
+	} else if (shift_amt > 32) {
+		emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_NONE,
+			 reg_b(dst), SHF_SC_L_SHF, shift_amt - 32);
+		wrp_immed(nfp_prog, reg_both(dst), 0);
+	}
+	return 0;
+}
 static int shl_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
 	const struct bpf_insn *insn = &meta->insn;
 	u8 dst = insn->dst_reg * 2;
-	if (insn->imm < 32) {
+	return __shl_imm64(nfp_prog, dst, insn->imm);
-		emit_shf(nfp_prog, reg_both(dst + 1),
+}
-			 reg_a(dst + 1), SHF_OP_NONE, reg_b(dst),
-			 SHF_SC_R_DSHF, 32 - insn->imm);
+static void shl_reg64_lt32_high(struct nfp_prog *nfp_prog, u8 dst, u8 src)
-		emit_shf(nfp_prog, reg_both(dst),
+{
-			 reg_none(), SHF_OP_NONE, reg_b(dst),
+	emit_alu(nfp_prog, imm_both(nfp_prog), reg_imm(32), ALU_OP_SUB,
-			 SHF_SC_L_SHF, insn->imm);
+		 reg_b(src));
-	} else if (insn->imm == 32) {
+	emit_alu(nfp_prog, reg_none(), imm_a(nfp_prog), ALU_OP_OR, reg_imm(0));
-		wrp_reg_mov(nfp_prog, dst + 1, dst);
+	emit_shf_indir(nfp_prog, reg_both(dst + 1), reg_a(dst + 1), SHF_OP_NONE,
-		wrp_immed(nfp_prog, reg_both(dst), 0);
+		       reg_b(dst), SHF_SC_R_DSHF);
-	} else if (insn->imm > 32) {
+}
-		emit_shf(nfp_prog, reg_both(dst + 1),
-			 reg_none(), SHF_OP_NONE, reg_b(dst),
+/* NOTE: for indirect left shift, HIGH part should be calculated first. */
-			 SHF_SC_L_SHF, insn->imm - 32);
+static void shl_reg64_lt32_low(struct nfp_prog *nfp_prog, u8 dst, u8 src)
-		wrp_immed(nfp_prog, reg_both(dst), 0);
+{
+	emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0));
+	emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE,
+		       reg_b(dst), SHF_SC_L_SHF);
+}
+static void shl_reg64_lt32(struct nfp_prog *nfp_prog, u8 dst, u8 src)
+{
+	shl_reg64_lt32_high(nfp_prog, dst, src);
+	shl_reg64_lt32_low(nfp_prog, dst, src);
+}
+static void shl_reg64_ge32(struct nfp_prog *nfp_prog, u8 dst, u8 src)
+{
+	emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0));
+	emit_shf_indir(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_NONE,
+		       reg_b(dst), SHF_SC_L_SHF);
+	wrp_immed(nfp_prog, reg_both(dst), 0);
+}
+static int shl_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	const struct bpf_insn *insn = &meta->insn;
+	u64 umin, umax;
+	u8 dst, src;
+	dst = insn->dst_reg * 2;
+	umin = meta->umin;
+	umax = meta->umax;
+	if (umin == umax)
+		return __shl_imm64(nfp_prog, dst, umin);
+	src = insn->src_reg * 2;
+	if (umax < 32) {
+		shl_reg64_lt32(nfp_prog, dst, src);
+	} else if (umin >= 32) {
+		shl_reg64_ge32(nfp_prog, dst, src);
+	} else {
+		/* Generate different instruction sequences depending on runtime
+		 * value of shift amount.
+		 */
+		u16 label_ge32, label_end;
+		label_ge32 = nfp_prog_current_offset(nfp_prog) + 7;
+		emit_br_bset(nfp_prog, reg_a(src), 5, label_ge32, 0);
+		shl_reg64_lt32_high(nfp_prog, dst, src);
+		label_end = nfp_prog_current_offset(nfp_prog) + 6;
+		emit_br(nfp_prog, BR_UNC, label_end, 2);
+		/* shl_reg64_lt32_low packed in delay slot. */
+		shl_reg64_lt32_low(nfp_prog, dst, src);
+		if (!nfp_prog_confirm_current_offset(nfp_prog, label_ge32))
+			return -EINVAL;
+		shl_reg64_ge32(nfp_prog, dst, src);
+		if (!nfp_prog_confirm_current_offset(nfp_prog, label_end))
+			return -EINVAL;
+	}
+	return 0;
+}
+/* Pseudo code:
+ *   if shift_amt >= 32
+ *     dst_high = 0;
+ *     dst_low = dst_high >> shift_amt[4:0]
+ *   else
+ *     dst_high = dst_high >> shift_amt
+ *     dst_low = (dst_high, dst_low) >> shift_amt
+ *
+ * The indirect shift will use the same logic at runtime.
+ */
+static int __shr_imm64(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt)
+{
+	if (shift_amt < 32) {
+		emit_shf(nfp_prog, reg_both(dst), reg_a(dst + 1), SHF_OP_NONE,
+			 reg_b(dst), SHF_SC_R_DSHF, shift_amt);
+		emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_NONE,
+			 reg_b(dst + 1), SHF_SC_R_SHF, shift_amt);
+	} else if (shift_amt == 32) {
+		wrp_reg_mov(nfp_prog, dst, dst + 1);
+		wrp_immed(nfp_prog, reg_both(dst + 1), 0);
+	} else if (shift_amt > 32) {
+		emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE,
+			 reg_b(dst + 1), SHF_SC_R_SHF, shift_amt - 32);
+		wrp_immed(nfp_prog, reg_both(dst + 1), 0);
 	}
 	return 0;
@@ -1659,21 +1842,186 @@ static int shr_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 	const struct bpf_insn *insn = &meta->insn;
 	u8 dst = insn->dst_reg * 2;
-	if (insn->imm < 32) {
+	return __shr_imm64(nfp_prog, dst, insn->imm);
-		emit_shf(nfp_prog, reg_both(dst),
+}
-			 reg_a(dst + 1), SHF_OP_NONE, reg_b(dst),
-			 SHF_SC_R_DSHF, insn->imm);
+/* NOTE: for indirect right shift, LOW part should be calculated first. */
-		emit_shf(nfp_prog, reg_both(dst + 1),
+static void shr_reg64_lt32_high(struct nfp_prog *nfp_prog, u8 dst, u8 src)
-			 reg_none(), SHF_OP_NONE, reg_b(dst + 1),
+{
-			 SHF_SC_R_SHF, insn->imm);
+	emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0));
-	} else if (insn->imm == 32) {
+	emit_shf_indir(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_NONE,
+		       reg_b(dst + 1), SHF_SC_R_SHF);
+}
+static void shr_reg64_lt32_low(struct nfp_prog *nfp_prog, u8 dst, u8 src)
+{
+	emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0));
+	emit_shf_indir(nfp_prog, reg_both(dst), reg_a(dst + 1), SHF_OP_NONE,
+		       reg_b(dst), SHF_SC_R_DSHF);
+}
+static void shr_reg64_lt32(struct nfp_prog *nfp_prog, u8 dst, u8 src)
+{
+	shr_reg64_lt32_low(nfp_prog, dst, src);
+	shr_reg64_lt32_high(nfp_prog, dst, src);
+}
+static void shr_reg64_ge32(struct nfp_prog *nfp_prog, u8 dst, u8 src)
+{
+	emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0));
+	emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE,
+		       reg_b(dst + 1), SHF_SC_R_SHF);
+	wrp_immed(nfp_prog, reg_both(dst + 1), 0);
+}
+static int shr_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	const struct bpf_insn *insn = &meta->insn;
+	u64 umin, umax;
+	u8 dst, src;
+	dst = insn->dst_reg * 2;
+	umin = meta->umin;
+	umax = meta->umax;
+	if (umin == umax)
+		return __shr_imm64(nfp_prog, dst, umin);
+	src = insn->src_reg * 2;
+	if (umax < 32) {
+		shr_reg64_lt32(nfp_prog, dst, src);
+	} else if (umin >= 32) {
+		shr_reg64_ge32(nfp_prog, dst, src);
+	} else {
+		/* Generate different instruction sequences depending on runtime
+		 * value of shift amount.
+		 */
+		u16 label_ge32, label_end;
+		label_ge32 = nfp_prog_current_offset(nfp_prog) + 6;
+		emit_br_bset(nfp_prog, reg_a(src), 5, label_ge32, 0);
+		shr_reg64_lt32_low(nfp_prog, dst, src);
+		label_end = nfp_prog_current_offset(nfp_prog) + 6;
+		emit_br(nfp_prog, BR_UNC, label_end, 2);
+		/* shr_reg64_lt32_high packed in delay slot. */
+		shr_reg64_lt32_high(nfp_prog, dst, src);
+		if (!nfp_prog_confirm_current_offset(nfp_prog, label_ge32))
+			return -EINVAL;
+		shr_reg64_ge32(nfp_prog, dst, src);
+		if (!nfp_prog_confirm_current_offset(nfp_prog, label_end))
+			return -EINVAL;
+	}
+	return 0;
+}
+/* Code logic is the same as __shr_imm64 except ashr requires signedness bit
+ * told through PREV_ALU result.
+ */
+static int __ashr_imm64(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt)
+{
+	if (shift_amt < 32) {
+		emit_shf(nfp_prog, reg_both(dst), reg_a(dst + 1), SHF_OP_NONE,
+			 reg_b(dst), SHF_SC_R_DSHF, shift_amt);
+		/* Set signedness bit. */
+		emit_alu(nfp_prog, reg_none(), reg_a(dst + 1), ALU_OP_OR,
+			 reg_imm(0));
+		emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR,
+			 reg_b(dst + 1), SHF_SC_R_SHF, shift_amt);
+	} else if (shift_amt == 32) {
+		/* NOTE: this also helps setting signedness bit. */
 		wrp_reg_mov(nfp_prog, dst, dst + 1);
-		wrp_immed(nfp_prog, reg_both(dst + 1), 0);
+		emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR,
-	} else if (insn->imm > 32) {
+			 reg_b(dst + 1), SHF_SC_R_SHF, 31);
-		emit_shf(nfp_prog, reg_both(dst),
+	} else if (shift_amt > 32) {
-			 reg_none(), SHF_OP_NONE, reg_b(dst + 1),
+		emit_alu(nfp_prog, reg_none(), reg_a(dst + 1), ALU_OP_OR,
-			 SHF_SC_R_SHF, insn->imm - 32);
+			 reg_imm(0));
-		wrp_immed(nfp_prog, reg_both(dst + 1), 0);
+		emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_ASHR,
+			 reg_b(dst + 1), SHF_SC_R_SHF, shift_amt - 32);
+		emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR,
+			 reg_b(dst + 1), SHF_SC_R_SHF, 31);
+	}
+	return 0;
+}
+static int ashr_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	const struct bpf_insn *insn = &meta->insn;
+	u8 dst = insn->dst_reg * 2;
+	return __ashr_imm64(nfp_prog, dst, insn->imm);
+}
+static void ashr_reg64_lt32_high(struct nfp_prog *nfp_prog, u8 dst, u8 src)
+{
+	/* NOTE: the first insn will set both indirect shift amount (source A)
+	 * and signedness bit (MSB of result).
+	 */
+	emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_b(dst + 1));
+	emit_shf_indir(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR,
+		       reg_b(dst + 1), SHF_SC_R_SHF);
+}
+static void ashr_reg64_lt32_low(struct nfp_prog *nfp_prog, u8 dst, u8 src)
+{
+	/* NOTE: it is the same as logic shift because we don't need to shift in
+	 * signedness bit when the shift amount is less than 32.
+	 */
+	return shr_reg64_lt32_low(nfp_prog, dst, src);
+}
+static void ashr_reg64_lt32(struct nfp_prog *nfp_prog, u8 dst, u8 src)
+{
+	ashr_reg64_lt32_low(nfp_prog, dst, src);
+	ashr_reg64_lt32_high(nfp_prog, dst, src);
+}
+static void ashr_reg64_ge32(struct nfp_prog *nfp_prog, u8 dst, u8 src)
+{
+	emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_b(dst + 1));
+	emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_ASHR,
+		       reg_b(dst + 1), SHF_SC_R_SHF);
+	emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR,
+		 reg_b(dst + 1), SHF_SC_R_SHF, 31);
+}
+/* Like ashr_imm64, but need to use indirect shift. */
+static int ashr_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	const struct bpf_insn *insn = &meta->insn;
+	u64 umin, umax;
+	u8 dst, src;
+	dst = insn->dst_reg * 2;
+	umin = meta->umin;
+	umax = meta->umax;
+	if (umin == umax)
+		return __ashr_imm64(nfp_prog, dst, umin);
+	src = insn->src_reg * 2;
+	if (umax < 32) {
+		ashr_reg64_lt32(nfp_prog, dst, src);
+	} else if (umin >= 32) {
+		ashr_reg64_ge32(nfp_prog, dst, src);
+	} else {
+		u16 label_ge32, label_end;
+		label_ge32 = nfp_prog_current_offset(nfp_prog) + 6;
+		emit_br_bset(nfp_prog, reg_a(src), 5, label_ge32, 0);
+		ashr_reg64_lt32_low(nfp_prog, dst, src);
+		label_end = nfp_prog_current_offset(nfp_prog) + 6;
+		emit_br(nfp_prog, BR_UNC, label_end, 2);
+		/* ashr_reg64_lt32_high packed in delay slot. */
+		ashr_reg64_lt32_high(nfp_prog, dst, src);
+		if (!nfp_prog_confirm_current_offset(nfp_prog, label_ge32))
+			return -EINVAL;
+		ashr_reg64_ge32(nfp_prog, dst, src);
+		if (!nfp_prog_confirm_current_offset(nfp_prog, label_end))
+			return -EINVAL;
 	}
 	return 0;
@@ -2501,8 +2849,12 @@ static const instr_cb_t instr_cb[256] = {
 	[BPF_ALU64 | BPF_SUB | BPF_X] =	sub_reg64,
 	[BPF_ALU64 | BPF_SUB | BPF_K] =	sub_imm64,
 	[BPF_ALU64 | BPF_NEG] =		neg_reg64,
+	[BPF_ALU64 | BPF_LSH | BPF_X] =	shl_reg64,
 	[BPF_ALU64 | BPF_LSH | BPF_K] =	shl_imm64,
+	[BPF_ALU64 | BPF_RSH | BPF_X] =	shr_reg64,
 	[BPF_ALU64 | BPF_RSH | BPF_K] =	shr_imm64,
+	[BPF_ALU64 | BPF_ARSH | BPF_X] = ashr_reg64,
+	[BPF_ALU64 | BPF_ARSH | BPF_K] = ashr_imm64,
 	[BPF_ALU | BPF_MOV | BPF_X] =	mov_reg,
 	[BPF_ALU | BPF_MOV | BPF_K] =	mov_imm,
 	[BPF_ALU | BPF_XOR | BPF_X] =	xor_reg,

--- a/drivers/net/ethernet/netronome/nfp/bpf/main.h
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h
@@ -263,6 +263,8 @@ struct nfp_bpf_reg_state {
 * @func_id: function id for call instructions
 * @arg1: arg1 for call instructions
 * @arg2: arg2 for call instructions
+ * @umin: copy of core verifier umin_value.
+ * @umax: copy of core verifier umax_value.
 * @off: index of first generated machine instruction (in nfp_prog.prog)
 * @n: eBPF instruction number
 * @flags: eBPF instruction extra optimization flags
@@ -298,6 +300,13 @@ struct nfp_insn_meta {
 			struct bpf_reg_state arg1;
 			struct nfp_bpf_reg_state arg2;
 		};
+		/* We are interested in range info for some operands,
+		 * for example, the shift amount.
+		 */
+		struct {
+			u64 umin;
+			u64 umax;
+		};
 	};
 	unsigned int off;
 	unsigned short n;
@@ -375,6 +384,25 @@ static inline bool is_mbpf_xadd(const struct nfp_insn_meta *meta)
 	return (meta->insn.code & ~BPF_SIZE_MASK) == (BPF_STX | BPF_XADD);
 }
+static inline bool is_mbpf_indir_shift(const struct nfp_insn_meta *meta)
+{
+	u8 code = meta->insn.code;
+	bool is_alu, is_shift;
+	u8 opclass, opcode;
+	opclass = BPF_CLASS(code);
+	is_alu = opclass == BPF_ALU64 || opclass == BPF_ALU;
+	if (!is_alu)
+		return false;
+	opcode = BPF_OP(code);
+	is_shift = opcode == BPF_LSH || opcode == BPF_RSH || opcode == BPF_ARSH;
+	if (!is_shift)
+		return false;
+	return BPF_SRC(code) == BPF_X;
+}
 /**
 * struct nfp_prog - nfp BPF program
 * @bpf: backpointer to the bpf app priv structure

--- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
@@ -190,6 +190,8 @@ nfp_prog_prepare(struct nfp_prog *nfp_prog, const struct bpf_insn *prog,
 		meta->insn = prog[i];
 		meta->n = i;
+		if (is_mbpf_indir_shift(meta))
+			meta->umin = U64_MAX;
 		list_add_tail(&meta->l, &nfp_prog->insns);
 	}

--- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
@@ -551,6 +551,14 @@ nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx)
 	if (is_mbpf_xadd(meta))
 		return nfp_bpf_check_xadd(nfp_prog, meta, env);
+	if (is_mbpf_indir_shift(meta)) {
+		const struct bpf_reg_state *sreg =
+			cur_regs(env) + meta->insn.src_reg;
+		meta->umin = min(meta->umin, sreg->umin_value);
+		meta->umax = max(meta->umax, sreg->umax_value);
+	}
 	return 0;
 }

--- a/drivers/net/ethernet/netronome/nfp/nfp_asm.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.h
@@ -72,8 +72,21 @@
 #define OP_BR_ADDR_LO		0x007ffc00000ULL
 #define OP_BR_ADDR_HI		0x10000000000ULL
-#define nfp_is_br(_insn)				\
+#define OP_BR_BIT_BASE		0x0d000000000ULL
-	(((_insn) & OP_BR_BASE_MASK) == OP_BR_BASE)
+#define OP_BR_BIT_BASE_MASK	0x0f800080300ULL
+#define OP_BR_BIT_A_SRC		0x000000000ffULL
+#define OP_BR_BIT_B_SRC		0x0000003fc00ULL
+#define OP_BR_BIT_BV		0x00000040000ULL
+#define OP_BR_BIT_SRC_LMEXTN	0x40000000000ULL
+#define OP_BR_BIT_DEFBR		OP_BR_DEFBR
+#define OP_BR_BIT_ADDR_LO	OP_BR_ADDR_LO
+#define OP_BR_BIT_ADDR_HI	OP_BR_ADDR_HI
+static inline bool nfp_is_br(u64 insn)
+{
+	return (insn & OP_BR_BASE_MASK) == OP_BR_BASE ||
+	       (insn & OP_BR_BIT_BASE_MASK) == OP_BR_BIT_BASE;
+}
 enum br_mask {
 	BR_BEQ = 0x00,
@@ -161,6 +174,7 @@ enum shf_op {
 	SHF_OP_NONE = 0,
 	SHF_OP_AND = 2,
 	SHF_OP_OR = 5,
+	SHF_OP_ASHR = 6,
 };
 enum shf_sc {

--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -70,6 +70,7 @@
 #include <net/netns/generic.h>
 #include <net/rtnetlink.h>
 #include <net/sock.h>
+#include <net/xdp.h>
 #include <linux/seq_file.h>
 #include <linux/uio.h>
 #include <linux/skb_array.h>
@@ -1284,34 +1285,44 @@ static const struct net_device_ops tun_netdev_ops = {
 	.ndo_get_stats64	= tun_net_get_stats64,
 };
-static int tun_xdp_xmit(struct net_device *dev, struct xdp_frame *frame)
+static int tun_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames)
 {
 	struct tun_struct *tun = netdev_priv(dev);
 	struct tun_file *tfile;
 	u32 numqueues;
-	int ret = 0;
+	int drops = 0;
+	int cnt = n;
+	int i;
 	rcu_read_lock();
 	numqueues = READ_ONCE(tun->numqueues);
 	if (!numqueues) {
-		ret = -ENOSPC;
+		rcu_read_unlock();
-		goto out;
+		return -ENXIO; /* Caller will free/return all frames */
 	}
 	tfile = rcu_dereference(tun->tfiles[smp_processor_id() %
 					    numqueues]);
-	/* Encode the XDP flag into lowest bit for consumer to differ
-	 * XDP buffer from sk_buff.
+	spin_lock(&tfile->tx_ring.producer_lock);
-	 */
+	for (i = 0; i < n; i++) {
-	if (ptr_ring_produce(&tfile->tx_ring, tun_xdp_to_ptr(frame))) {
+		struct xdp_frame *xdp = frames[i];
-		this_cpu_inc(tun->pcpu_stats->tx_dropped);
+		/* Encode the XDP flag into lowest bit for consumer to differ
-		ret = -ENOSPC;
+		 * XDP buffer from sk_buff.
+		 */
+		void *frame = tun_xdp_to_ptr(xdp);
+		if (__ptr_ring_produce(&tfile->tx_ring, frame)) {
+			this_cpu_inc(tun->pcpu_stats->tx_dropped);
+			xdp_return_frame_rx_napi(xdp);
+			drops++;
+		}
 	}
+	spin_unlock(&tfile->tx_ring.producer_lock);
-out:
 	rcu_read_unlock();
-	return ret;
+	return cnt - drops;
 }
 static int tun_xdp_tx(struct net_device *dev, struct xdp_buff *xdp)
@@ -1321,7 +1332,7 @@ static int tun_xdp_tx(struct net_device *dev, struct xdp_buff *xdp)
 	if (unlikely(!frame))
 		return -EOVERFLOW;
-	return tun_xdp_xmit(dev, frame);
+	return tun_xdp_xmit(dev, 1, &frame);
 }
 static void tun_xdp_flush(struct net_device *dev)

--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -419,23 +419,13 @@ static void virtnet_xdp_flush(struct net_device *dev)
 	virtqueue_kick(sq->vq);
 }
-static int __virtnet_xdp_xmit(struct virtnet_info *vi,
+static int __virtnet_xdp_xmit_one(struct virtnet_info *vi,
-			       struct xdp_frame *xdpf)
+				   struct send_queue *sq,
+				   struct xdp_frame *xdpf)
 {
 	struct virtio_net_hdr_mrg_rxbuf *hdr;
-	struct xdp_frame *xdpf_sent;
-	struct send_queue *sq;
-	unsigned int len;
-	unsigned int qp;
 	int err;
-	qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id();
-	sq = &vi->sq[qp];
-	/* Free up any pending old buffers before queueing new ones. */
-	while ((xdpf_sent = virtqueue_get_buf(sq->vq, &len)) != NULL)
-		xdp_return_frame(xdpf_sent);
 	/* virtqueue want to use data area in-front of packet */
 	if (unlikely(xdpf->metasize > 0))
 		return -EOPNOTSUPP;
@@ -459,11 +449,40 @@ static int __virtnet_xdp_xmit(struct virtnet_info *vi,
 	return 0;
 }
-static int virtnet_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf)
+static int __virtnet_xdp_tx_xmit(struct virtnet_info *vi,
+				   struct xdp_frame *xdpf)
+{
+	struct xdp_frame *xdpf_sent;
+	struct send_queue *sq;
+	unsigned int len;
+	unsigned int qp;
+	qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id();
+	sq = &vi->sq[qp];
+	/* Free up any pending old buffers before queueing new ones. */
+	while ((xdpf_sent = virtqueue_get_buf(sq->vq, &len)) != NULL)
+		xdp_return_frame(xdpf_sent);
+	return __virtnet_xdp_xmit_one(vi, sq, xdpf);
+}
+static int virtnet_xdp_xmit(struct net_device *dev,
+			    int n, struct xdp_frame **frames)
 {
 	struct virtnet_info *vi = netdev_priv(dev);
 	struct receive_queue *rq = vi->rq;
+	struct xdp_frame *xdpf_sent;
 	struct bpf_prog *xdp_prog;
+	struct send_queue *sq;
+	unsigned int len;
+	unsigned int qp;
+	int drops = 0;
+	int err;
+	int i;
+	qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id();
+	sq = &vi->sq[qp];
 	/* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this
 	 * indicate XDP resources have been successfully allocated.
@@ -472,7 +491,20 @@ static int virtnet_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf)
 	if (!xdp_prog)
 		return -ENXIO;
-	return __virtnet_xdp_xmit(vi, xdpf);
+	/* Free up any pending old buffers before queueing new ones. */
+	while ((xdpf_sent = virtqueue_get_buf(sq->vq, &len)) != NULL)
+		xdp_return_frame(xdpf_sent);
+	for (i = 0; i < n; i++) {
+		struct xdp_frame *xdpf = frames[i];
+		err = __virtnet_xdp_xmit_one(vi, sq, xdpf);
+		if (err) {
+			xdp_return_frame_rx_napi(xdpf);
+			drops++;
+		}
+	}
+	return n - drops;
 }
 static unsigned int virtnet_get_headroom(struct virtnet_info *vi)
@@ -616,7 +648,7 @@ static struct sk_buff *receive_small(struct net_device *dev,
 			xdpf = convert_to_xdp_frame(&xdp);
 			if (unlikely(!xdpf))
 				goto err_xdp;
-			err = __virtnet_xdp_xmit(vi, xdpf);
+			err = __virtnet_xdp_tx_xmit(vi, xdpf);
 			if (unlikely(err)) {
 				trace_xdp_exception(vi->dev, xdp_prog, act);
 				goto err_xdp;
@@ -779,7 +811,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 			xdpf = convert_to_xdp_frame(&xdp);
 			if (unlikely(!xdpf))
 				goto err_xdp;
-			err = __virtnet_xdp_xmit(vi, xdpf);
+			err = __virtnet_xdp_tx_xmit(vi, xdpf);
 			if (unlikely(err)) {
 				trace_xdp_exception(vi->dev, xdp_prog, act);
 				if (unlikely(xdp_page != page))

--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -69,8 +69,8 @@ struct bpf_map {
 	u32 pages;
 	u32 id;
 	int numa_node;
-	u32 btf_key_id;
+	u32 btf_key_type_id;
-	u32 btf_value_id;
+	u32 btf_value_type_id;
 	struct btf *btf;
 	bool unpriv_array;
 	/* 55 bytes hole */
@@ -463,6 +463,8 @@ int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file,
 int bpf_fd_htab_map_lookup_elem(struct bpf_map *map, void *key, u32 *value);
 int bpf_get_file_flag(int flags);
+int bpf_check_uarg_tail_zero(void __user *uaddr, size_t expected_size,
+			     size_t actual_size);
 /* memcpy that is used with 8-byte aligned pointers, power-of-8 size and
 * forced to use 'long' read/writes to try to atomically copy long counters.
@@ -485,14 +487,17 @@ int bpf_check(struct bpf_prog **fp, union bpf_attr *attr);
 void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth);
 /* Map specifics */
-struct net_device  *__dev_map_lookup_elem(struct bpf_map *map, u32 key);
+struct xdp_buff;
+struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key);
 void __dev_map_insert_ctx(struct bpf_map *map, u32 index);
 void __dev_map_flush(struct bpf_map *map);
+int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
+		    struct net_device *dev_rx);
 struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key);
 void __cpu_map_insert_ctx(struct bpf_map *map, u32 index);
 void __cpu_map_flush(struct bpf_map *map);
-struct xdp_buff;
 int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,
 		    struct net_device *dev_rx);
@@ -571,6 +576,16 @@ static inline void __dev_map_flush(struct bpf_map *map)
 {
 }
+struct xdp_buff;
+struct bpf_dtab_netdev;
+static inline
+int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
+		    struct net_device *dev_rx)
+{
+	return 0;
+}
 static inline
 struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key)
 {
@@ -585,7 +600,6 @@ static inline void __cpu_map_flush(struct bpf_map *map)
 {
 }
-struct xdp_buff;
 static inline int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu,
 				  struct xdp_buff *xdp,
 				  struct net_device *dev_rx)

--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -9,9 +9,10 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_XDP, xdp)
 BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SKB, cg_skb)
 BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK, cg_sock)
 BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK_ADDR, cg_sock_addr)
-BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_inout)
+BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_in)
-BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_inout)
+BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_out)
 BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_XMIT, lwt_xmit)
+BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_SEG6LOCAL, lwt_seg6local)
 BPF_PROG_TYPE(BPF_PROG_TYPE_SOCK_OPS, sock_ops)
 BPF_PROG_TYPE(BPF_PROG_TYPE_SK_SKB, sk_skb)
 BPF_PROG_TYPE(BPF_PROG_TYPE_SK_MSG, sk_msg)

--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -517,6 +517,7 @@ struct sk_msg_buff {
 	bool sg_copy[MAX_SKB_FRAGS];
 	__u32 flags;
 	struct sock *sk_redir;
+	struct sock *sk;
 	struct sk_buff *skb;
 	struct list_head list;
 };

--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1185,9 +1185,13 @@ struct dev_ifalias {
 *	This function is used to set or query state related to XDP on the
 *	netdevice and manage BPF offload. See definition of
 *	enum bpf_netdev_command for details.
- * int (*ndo_xdp_xmit)(struct net_device *dev, struct xdp_frame *xdp);
+ * int (*ndo_xdp_xmit)(struct net_device *dev, int n, struct xdp_frame **xdp);
- *	This function is used to submit a XDP packet for transmit on a
+ *	This function is used to submit @n XDP packets for transmit on a
- *	netdevice.
+ *	netdevice. Returns number of frames successfully transmitted, frames
+ *	that got dropped are freed/returned via xdp_return_frame().
+ *	Returns negative number, means general error invoking ndo, meaning
+ *	no frames were xmit'ed and core-caller will free all frames.
+ *	TODO: Consider add flag to allow sending flush operation.
 * void (*ndo_xdp_flush)(struct net_device *dev);
 *	This function is used to inform the driver to flush a particular
 *	xdp tx queue. Must be called on same CPU as xdp_xmit.
@@ -1375,8 +1379,8 @@ struct net_device_ops {
 						       int needed_headroom);
 	int			(*ndo_bpf)(struct net_device *dev,
 					   struct netdev_bpf *bpf);
-	int			(*ndo_xdp_xmit)(struct net_device *dev,
+	int			(*ndo_xdp_xmit)(struct net_device *dev, int n,
-						struct xdp_frame *xdp);
+						struct xdp_frame **xdp);
 	void			(*ndo_xdp_flush)(struct net_device *dev);
 };

--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -868,6 +868,7 @@ extern void perf_event_exit_task(struct task_struct *child);
 extern void perf_event_free_task(struct task_struct *task);
 extern void perf_event_delayed_put(struct task_struct *task);
 extern struct file *perf_event_get(unsigned int fd);
+extern const struct perf_event *perf_get_event(struct file *file);
 extern const struct perf_event_attr *perf_event_attrs(struct perf_event *event);
 extern void perf_event_print_debug(void);
 extern void perf_pmu_disable(struct pmu *pmu);
@@ -1289,6 +1290,10 @@ static inline void perf_event_exit_task(struct task_struct *child)	{ }
 static inline void perf_event_free_task(struct task_struct *task)	{ }
 static inline void perf_event_delayed_put(struct task_struct *task)	{ }
 static inline struct file *perf_event_get(unsigned int fd)	{ return ERR_PTR(-EINVAL); }
+static inline const struct perf_event *perf_get_event(struct file *file)
+{
+	return ERR_PTR(-EINVAL);
+}
 static inline const struct perf_event_attr *perf_event_attrs(struct perf_event *event)
 {
 	return ERR_PTR(-EINVAL);

--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -473,6 +473,9 @@ int perf_event_query_prog_array(struct perf_event *event, void __user *info);
 int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog);
 int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog);
 struct bpf_raw_event_map *bpf_find_raw_tracepoint(const char *name);
+int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
+			    u32 *fd_type, const char **buf,
+			    u64 *probe_offset, u64 *probe_addr);
 #else
 static inline unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
 {
@@ -504,6 +507,13 @@ static inline struct bpf_raw_event_map *bpf_find_raw_tracepoint(const char *name
 {
 	return NULL;
 }
+static inline int bpf_get_perf_event_info(const struct perf_event *event,
+					  u32 *prog_id, u32 *fd_type,
+					  const char **buf, u64 *probe_offset,
+					  u64 *probe_addr)
+{
+	return -EOPNOTSUPP;
+}
 #endif
 enum {
@@ -560,10 +570,17 @@ extern void perf_trace_del(struct perf_event *event, int flags);
 #ifdef CONFIG_KPROBE_EVENTS
 extern int  perf_kprobe_init(struct perf_event *event, bool is_retprobe);
 extern void perf_kprobe_destroy(struct perf_event *event);
+extern int bpf_get_kprobe_info(const struct perf_event *event,
+			       u32 *fd_type, const char **symbol,
+			       u64 *probe_offset, u64 *probe_addr,
+			       bool perf_type_tracepoint);
 #endif
 #ifdef CONFIG_UPROBE_EVENTS
 extern int  perf_uprobe_init(struct perf_event *event, bool is_retprobe);
 extern void perf_uprobe_destroy(struct perf_event *event);
+extern int bpf_get_uprobe_info(const struct perf_event *event,
+			       u32 *fd_type, const char **filename,
+			       u64 *probe_offset, bool perf_type_tracepoint);
 #endif
 extern int  ftrace_profile_set_filter(struct perf_event *event, int event_id,
 				     char *filter_str);

--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -236,6 +236,8 @@ struct ipv6_stub {
 						   struct flowi6 *fl6, int oif,
 						   const struct sk_buff *skb,
 						   int strict);
+	u32 (*ip6_mtu_from_fib6)(struct fib6_info *f6i, struct in6_addr *daddr,
+				 struct in6_addr *saddr);
 	void (*udpv6_encap_enable)(void);
 	void (*ndisc_send_na)(struct net_device *dev, const struct in6_addr *daddr,

--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -412,6 +412,12 @@ static inline struct net_device *fib6_info_nh_dev(const struct fib6_info *f6i)
 	return f6i->fib6_nh.nh_dev;
 }
+static inline
+struct lwtunnel_state *fib6_info_nh_lwt(const struct fib6_info *f6i)
+{
+	return f6i->fib6_nh.nh_lwtstate;
+}
 void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
 		     unsigned int flags);

--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -294,6 +294,9 @@ static inline unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
 	return mtu;
 }
+u32 ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
+		      struct in6_addr *saddr);
 struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw,
 				   struct net_device *dev, struct sk_buff *skb,
 				   const void *daddr);

--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -449,4 +449,6 @@ static inline void fib_proc_exit(struct net *net)
 }
 #endif
+u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr);
 #endif  /* _NET_FIB_H */
--- a/include/net/page_pool.h
+++ b/include/net/page_pool.h
@@ -115,13 +115,14 @@ void page_pool_destroy(struct page_pool *pool);
 void __page_pool_put_page(struct page_pool *pool,
 			  struct page *page, bool allow_direct);
-static inline void page_pool_put_page(struct page_pool *pool, struct page *page)
+static inline void page_pool_put_page(struct page_pool *pool,
+				      struct page *page, bool allow_direct)
 {
 	/* When page_pool isn't compiled-in, net/core/xdp.c doesn't
 	 * allow registering MEM_TYPE_PAGE_POOL, but shield linker.
 	 */
 #ifdef CONFIG_PAGE_POOL
-	__page_pool_put_page(pool, page, false);
+	__page_pool_put_page(pool, page, allow_direct);
 #endif
 }
 /* Very limited use-cases allow recycle direct */

--- a/include/net/seg6.h
+++ b/include/net/seg6.h
@@ -49,7 +49,11 @@ struct seg6_pernet_data {
 static inline struct seg6_pernet_data *seg6_pernet(struct net *net)
 {
+#if IS_ENABLED(CONFIG_IPV6)
 	return net->ipv6.seg6_data;
+#else
+	return NULL;
+#endif
 }
 extern int seg6_init(void);
@@ -63,5 +67,6 @@ extern bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len);
 extern int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh,
 			     int proto);
 extern int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh);
+extern int seg6_lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
+			       u32 tbl_id);
 #endif
--- a/include/net/seg6_local.h
+++ b/include/net/seg6_local.h
+/*
+ *  SR-IPv6 implementation
+ *
+ *  Authors:
+ *  David Lebrun <david.lebrun@uclouvain.be>
+ *  eBPF support: Mathieu Xhonneux <m.xhonneux@gmail.com>
+ *
+ *
+ *  This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+#ifndef _NET_SEG6_LOCAL_H
+#define _NET_SEG6_LOCAL_H
+#include <linux/percpu.h>
+#include <linux/net.h>
+#include <linux/ipv6.h>
+extern int seg6_lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
+			       u32 tbl_id);
+struct seg6_bpf_srh_state {
+	bool valid;
+	u16 hdrlen;
+};
+DECLARE_PER_CPU(struct seg6_bpf_srh_state, seg6_bpf_srh_states);
+#endif
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -104,6 +104,7 @@ struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp)
 }
 void xdp_return_frame(struct xdp_frame *xdpf);
+void xdp_return_frame_rx_napi(struct xdp_frame *xdpf);
 void xdp_return_buff(struct xdp_buff *xdp);
 int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,

--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
-/* SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
- * AF_XDP internal functions
+/* AF_XDP internal functions
 * Copyright(c) 2018 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
 */
 #ifndef _LINUX_XDP_SOCK_H

--- a/include/trace/events/xdp.h
+++ b/include/trace/events/xdp.h
@@ -138,11 +138,18 @@ DEFINE_EVENT_PRINT(xdp_redirect_template, xdp_redirect_map_err,
 		  __entry->map_id, __entry->map_index)
 );
+#ifndef __DEVMAP_OBJ_TYPE
+#define __DEVMAP_OBJ_TYPE
+struct _bpf_dtab_netdev {
+	struct net_device *dev;
+};
+#endif /* __DEVMAP_OBJ_TYPE */
 #define devmap_ifindex(fwd, map)				\
 	(!fwd ? 0 :						\
 	 (!map ? 0 :						\
 	  ((map->map_type == BPF_MAP_TYPE_DEVMAP) ?		\
-	   ((struct net_device *)fwd)->ifindex : 0)))
+	   ((struct _bpf_dtab_netdev *)fwd)->dev->ifindex : 0)))
 #define _trace_xdp_redirect_map(dev, xdp, fwd, map, idx)		\
 	 trace_xdp_redirect_map(dev, xdp, devmap_ifindex(fwd, map),	\
@@ -222,6 +229,47 @@ TRACE_EVENT(xdp_cpumap_enqueue,
 		  __entry->to_cpu)
 );
+TRACE_EVENT(xdp_devmap_xmit,
+	TP_PROTO(const struct bpf_map *map, u32 map_index,
+		 int sent, int drops,
+		 const struct net_device *from_dev,
+		 const struct net_device *to_dev, int err),
+	TP_ARGS(map, map_index, sent, drops, from_dev, to_dev, err),
+	TP_STRUCT__entry(
+		__field(int, map_id)
+		__field(u32, act)
+		__field(u32, map_index)
+		__field(int, drops)
+		__field(int, sent)
+		__field(int, from_ifindex)
+		__field(int, to_ifindex)
+		__field(int, err)
+	),
+	TP_fast_assign(
+		__entry->map_id		= map->id;
+		__entry->act		= XDP_REDIRECT;
+		__entry->map_index	= map_index;
+		__entry->drops		= drops;
+		__entry->sent		= sent;
+		__entry->from_ifindex	= from_dev->ifindex;
+		__entry->to_ifindex	= to_dev->ifindex;
+		__entry->err		= err;
+	),
+	TP_printk("ndo_xdp_xmit"
+		  " map_id=%d map_index=%d action=%s"
+		  " sent=%d drops=%d"
+		  " from_ifindex=%d to_ifindex=%d err=%d",
+		  __entry->map_id, __entry->map_index,
+		  __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB),
+		  __entry->sent, __entry->drops,
+		  __entry->from_ifindex, __entry->to_ifindex, __entry->err)
+);
 #endif /* _TRACE_XDP_H */
 #include <trace/define_trace.h>
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -97,6 +97,7 @@ enum bpf_cmd {
 	BPF_RAW_TRACEPOINT_OPEN,
 	BPF_BTF_LOAD,
 	BPF_BTF_GET_FD_BY_ID,
+	BPF_TASK_FD_QUERY,
 };
 enum bpf_map_type {
@@ -141,6 +142,7 @@ enum bpf_prog_type {
 	BPF_PROG_TYPE_SK_MSG,
 	BPF_PROG_TYPE_RAW_TRACEPOINT,
 	BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+	BPF_PROG_TYPE_LWT_SEG6LOCAL,
 };
 enum bpf_attach_type {
@@ -284,8 +286,8 @@ union bpf_attr {
 		char	map_name[BPF_OBJ_NAME_LEN];
 		__u32	map_ifindex;	/* ifindex of netdev to create on */
 		__u32	btf_fd;		/* fd pointing to a BTF type data */
-		__u32	btf_key_id;	/* BTF type_id of the key */
+		__u32	btf_key_type_id;	/* BTF type_id of the key */
-		__u32	btf_value_id;	/* BTF type_id of the value */
+		__u32	btf_value_type_id;	/* BTF type_id of the value */
 	};
 	struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
@@ -379,6 +381,22 @@ union bpf_attr {
 		__u32		btf_log_size;
 		__u32		btf_log_level;
 	};
+	struct {
+		__u32		pid;		/* input: pid */
+		__u32		fd;		/* input: fd */
+		__u32		flags;		/* input: flags */
+		__u32		buf_len;	/* input/output: buf len */
+		__aligned_u64	buf;		/* input/output:
+						 *   tp_name for tracepoint
+						 *   symbol for kprobe
+						 *   filename for uprobe
+						 */
+		__u32		prog_id;	/* output: prod_id */
+		__u32		fd_type;	/* output: BPF_FD_TYPE_* */
+		__u64		probe_offset;	/* output: probe_offset */
+		__u64		probe_addr;	/* output: probe_addr */
+	} task_fd_query;
 } __attribute__((aligned(8)));
 /* The description below is an attempt at providing documentation to eBPF
@@ -1902,6 +1920,90 @@ union bpf_attr {
 *		egress otherwise). This is the only flag supported for now.
 *	Return
 *		**SK_PASS** on success, or **SK_DROP** on error.
+ *
+ * int bpf_lwt_push_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len)
+ *	Description
+ *		Encapsulate the packet associated to *skb* within a Layer 3
+ *		protocol header. This header is provided in the buffer at
+ *		address *hdr*, with *len* its size in bytes. *type* indicates
+ *		the protocol of the header and can be one of:
+ *
+ *		**BPF_LWT_ENCAP_SEG6**
+ *			IPv6 encapsulation with Segment Routing Header
+ *			(**struct ipv6_sr_hdr**). *hdr* only contains the SRH,
+ *			the IPv6 header is computed by the kernel.
+ *		**BPF_LWT_ENCAP_SEG6_INLINE**
+ *			Only works if *skb* contains an IPv6 packet. Insert a
+ *			Segment Routing Header (**struct ipv6_sr_hdr**) inside
+ *			the IPv6 header.
+ *
+ * 		A call to this helper is susceptible to change the underlaying
+ * 		packet buffer. Therefore, at load time, all checks on pointers
+ * 		previously done by the verifier are invalidated and must be
+ * 		performed again, if the helper is used in combination with
+ * 		direct packet access.
+ *	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_lwt_seg6_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len)
+ *	Description
+ *		Store *len* bytes from address *from* into the packet
+ *		associated to *skb*, at *offset*. Only the flags, tag and TLVs
+ *		inside the outermost IPv6 Segment Routing Header can be
+ *		modified through this helper.
+ *
+ * 		A call to this helper is susceptible to change the underlaying
+ * 		packet buffer. Therefore, at load time, all checks on pointers
+ * 		previously done by the verifier are invalidated and must be
+ * 		performed again, if the helper is used in combination with
+ * 		direct packet access.
+ *	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_lwt_seg6_adjust_srh(struct sk_buff *skb, u32 offset, s32 delta)
+ *	Description
+ *		Adjust the size allocated to TLVs in the outermost IPv6
+ *		Segment Routing Header contained in the packet associated to
+ *		*skb*, at position *offset* by *delta* bytes. Only offsets
+ *		after the segments are accepted. *delta* can be as well
+ *		positive (growing) as negative (shrinking).
+ *
+ * 		A call to this helper is susceptible to change the underlaying
+ * 		packet buffer. Therefore, at load time, all checks on pointers
+ * 		previously done by the verifier are invalidated and must be
+ * 		performed again, if the helper is used in combination with
+ * 		direct packet access.
+ *	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_lwt_seg6_action(struct sk_buff *skb, u32 action, void *param, u32 param_len)
+ *	Description
+ *		Apply an IPv6 Segment Routing action of type *action* to the
+ *		packet associated to *skb*. Each action takes a parameter
+ *		contained at address *param*, and of length *param_len* bytes.
+ *		*action* can be one of:
+ *
+ *		**SEG6_LOCAL_ACTION_END_X**
+ *			End.X action: Endpoint with Layer-3 cross-connect.
+ *			Type of *param*: **struct in6_addr**.
+ *		**SEG6_LOCAL_ACTION_END_T**
+ *			End.T action: Endpoint with specific IPv6 table lookup.
+ *			Type of *param*: **int**.
+ *		**SEG6_LOCAL_ACTION_END_B6**
+ *			End.B6 action: Endpoint bound to an SRv6 policy.
+ *			Type of param: **struct ipv6_sr_hdr**.
+ *		**SEG6_LOCAL_ACTION_END_B6_ENCAP**
+ *			End.B6.Encap action: Endpoint bound to an SRv6
+ *			encapsulation policy.
+ *			Type of param: **struct ipv6_sr_hdr**.
+ *
+ * 		A call to this helper is susceptible to change the underlaying
+ * 		packet buffer. Therefore, at load time, all checks on pointers
+ * 		previously done by the verifier are invalidated and must be
+ * 		performed again, if the helper is used in combination with
+ * 		direct packet access.
+ *	Return
+ * 		0 on success, or a negative error in case of failure.
 */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -1976,7 +2078,11 @@ union bpf_attr {
 	FN(fib_lookup),			\
 	FN(sock_hash_update),		\
 	FN(msg_redirect_hash),		\
-	FN(sk_redirect_hash),
+	FN(sk_redirect_hash),		\
+	FN(lwt_push_encap),		\
+	FN(lwt_seg6_store_bytes),	\
+	FN(lwt_seg6_adjust_srh),	\
+	FN(lwt_seg6_action),
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
 * function eBPF program intends to call
@@ -2043,6 +2149,12 @@ enum bpf_hdr_start_off {
 	BPF_HDR_START_NET,
 };
+/* Encapsulation type for BPF_FUNC_lwt_push_encap helper. */
+enum bpf_lwt_encap_mode {
+	BPF_LWT_ENCAP_SEG6,
+	BPF_LWT_ENCAP_SEG6_INLINE
+};
 /* user accessible mirror of in-kernel sk_buff.
 * new fields can only be added to the end of this structure
 */
@@ -2176,6 +2288,14 @@ enum sk_action {
 struct sk_msg_md {
 	void *data;
 	void *data_end;
+	__u32 family;
+	__u32 remote_ip4;	/* Stored in network byte order */
+	__u32 local_ip4;	/* Stored in network byte order */
+	__u32 remote_ip6[4];	/* Stored in network byte order */
+	__u32 local_ip6[4];	/* Stored in network byte order */
+	__u32 remote_port;	/* Stored in network byte order */
+	__u32 local_port;	/* stored in host byte order */
 };
 #define BPF_TAG_SIZE	8
@@ -2197,6 +2317,10 @@ struct bpf_prog_info {
 	__u32 gpl_compatible:1;
 	__u64 netns_dev;
 	__u64 netns_ino;
+	__u32 nr_jited_ksyms;
+	__u32 nr_jited_func_lens;
+	__aligned_u64 jited_ksyms;
+	__aligned_u64 jited_func_lens;
 } __attribute__((aligned(8)));
 struct bpf_map_info {
@@ -2211,8 +2335,8 @@ struct bpf_map_info {
 	__u64 netns_dev;
 	__u64 netns_ino;
 	__u32 btf_id;
-	__u32 btf_key_id;
+	__u32 btf_key_type_id;
-	__u32 btf_value_id;
+	__u32 btf_value_type_id;
 } __attribute__((aligned(8)));
 struct bpf_btf_info {
@@ -2450,4 +2574,13 @@ struct bpf_fib_lookup {
 	__u8	dmac[6];     /* ETH_ALEN */
 };
+enum bpf_task_fd_type {
+	BPF_FD_TYPE_RAW_TRACEPOINT,	/* tp name */
+	BPF_FD_TYPE_TRACEPOINT,		/* tp name */
+	BPF_FD_TYPE_KPROBE,		/* (symbol + offset) or addr */
+	BPF_FD_TYPE_KRETPROBE,		/* (symbol + offset) or addr */
+	BPF_FD_TYPE_UPROBE,		/* filename + offset */
+	BPF_FD_TYPE_URETPROBE,		/* filename + offset */
+};
 #endif /* _UAPI__LINUX_BPF_H__ */
--- a/include/uapi/linux/btf.h
+++ b/include/uapi/linux/btf.h
@@ -12,42 +12,29 @@ struct btf_header {
 	__u16	magic;
 	__u8	version;
 	__u8	flags;
+	__u32	hdr_len;
-	__u32	parent_label;
-	__u32	parent_name;
 	/* All offsets are in bytes relative to the end of this header */
-	__u32	label_off;	/* offset of label section	*/
-	__u32	object_off;	/* offset of data object section*/
-	__u32	func_off;	/* offset of function section	*/
 	__u32	type_off;	/* offset of type section	*/
+	__u32	type_len;	/* length of type section	*/
 	__u32	str_off;	/* offset of string section	*/
 	__u32	str_len;	/* length of string section	*/
 };
 /* Max # of type identifier */
-#define BTF_MAX_TYPE	0x7fffffff
+#define BTF_MAX_TYPE	0x0000ffff
 /* Max offset into the string section */
-#define BTF_MAX_NAME_OFFSET	0x7fffffff
+#define BTF_MAX_NAME_OFFSET	0x0000ffff
 /* Max # of struct/union/enum members or func args */
 #define BTF_MAX_VLEN	0xffff
-/* The type id is referring to a parent BTF */
-#define BTF_TYPE_PARENT(id)	(((id) >> 31) & 0x1)
-#define BTF_TYPE_ID(id)		((id) & BTF_MAX_TYPE)
-/* String is in the ELF string section */
-#define BTF_STR_TBL_ELF_ID(ref)	(((ref) >> 31) & 0x1)
-#define BTF_STR_OFFSET(ref)	((ref) & BTF_MAX_NAME_OFFSET)
 struct btf_type {
 	__u32 name_off;
 	/* "info" bits arrangement
 	 * bits  0-15: vlen (e.g. # of struct's members)
 	 * bits 16-23: unused
-	 * bits 24-28: kind (e.g. int, ptr, array...etc)
+	 * bits 24-27: kind (e.g. int, ptr, array...etc)
-	 * bits 29-30: unused
+	 * bits 28-31: unused
-	 * bits    31: root
 	 */
 	__u32 info;
 	/* "size" is used by INT, ENUM, STRUCT and UNION.
@@ -62,8 +49,7 @@ struct btf_type {
 	};
 };
-#define BTF_INFO_KIND(info)	(((info) >> 24) & 0x1f)
+#define BTF_INFO_KIND(info)	(((info) >> 24) & 0x0f)
-#define BTF_INFO_ISROOT(info)	(!!(((info) >> 24) & 0x80))
 #define BTF_INFO_VLEN(info)	((info) & 0xffff)
 #define BTF_KIND_UNKN		0	/* Unknown	*/
@@ -88,15 +74,14 @@ struct btf_type {
 /* BTF_KIND_INT is followed by a u32 and the following
 * is the 32 bits arrangement:
 */
-#define BTF_INT_ENCODING(VAL)	(((VAL) & 0xff000000) >> 24)
+#define BTF_INT_ENCODING(VAL)	(((VAL) & 0x0f000000) >> 24)
 #define BTF_INT_OFFSET(VAL)	(((VAL  & 0x00ff0000)) >> 16)
 #define BTF_INT_BITS(VAL)	((VAL)  & 0x0000ffff)
 /* Attributes stored in the BTF_INT_ENCODING */
-#define BTF_INT_SIGNED	0x1
+#define BTF_INT_SIGNED	(1 << 0)
-#define BTF_INT_CHAR	0x2
+#define BTF_INT_CHAR	(1 << 1)
-#define BTF_INT_BOOL	0x4
+#define BTF_INT_BOOL	(1 << 2)
-#define BTF_INT_VARARGS	0x8
 /* BTF_KIND_ENUM is followed by multiple "struct btf_enum".
 * The exact number of btf_enum is stored in the vlen (of the

--- a/include/uapi/linux/if_xdp.h
+++ b/include/uapi/linux/if_xdp.h
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
- *
+/*
 * if_xdp: XDP socket user-space interface
 * Copyright(c) 2018 Intel Corporation.
 *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
 * Author(s): Björn Töpel <bjorn.topel@intel.com>
 *	      Magnus Karlsson <magnus.karlsson@intel.com>
 */
@@ -26,19 +17,33 @@
 struct sockaddr_xdp {
 	__u16 sxdp_family;
+	__u16 sxdp_flags;
 	__u32 sxdp_ifindex;
 	__u32 sxdp_queue_id;
 	__u32 sxdp_shared_umem_fd;
-	__u16 sxdp_flags;
+};
+struct xdp_ring_offset {
+	__u64 producer;
+	__u64 consumer;
+	__u64 desc;
+};
+struct xdp_mmap_offsets {
+	struct xdp_ring_offset rx;
+	struct xdp_ring_offset tx;
+	struct xdp_ring_offset fr; /* Fill */
+	struct xdp_ring_offset cr; /* Completion */
 };
 /* XDP socket options */
-#define XDP_RX_RING			1
+#define XDP_MMAP_OFFSETS		1
-#define XDP_TX_RING			2
+#define XDP_RX_RING			2
-#define XDP_UMEM_REG			3
+#define XDP_TX_RING			3
-#define XDP_UMEM_FILL_RING		4
+#define XDP_UMEM_REG			4
-#define XDP_UMEM_COMPLETION_RING	5
+#define XDP_UMEM_FILL_RING		5
-#define XDP_STATISTICS			6
+#define XDP_UMEM_COMPLETION_RING	6
+#define XDP_STATISTICS			7
 struct xdp_umem_reg {
 	__u64 addr; /* Start of packet data area */
@@ -59,6 +64,7 @@ struct xdp_statistics {
 #define XDP_UMEM_PGOFF_FILL_RING	0x100000000
 #define XDP_UMEM_PGOFF_COMPLETION_RING	0x180000000
+/* Rx/Tx descriptor */
 struct xdp_desc {
 	__u32 idx;
 	__u32 len;
@@ -67,21 +73,6 @@ struct xdp_desc {
 	__u8 padding[5];
 };
-struct xdp_ring {
+/* UMEM descriptor is __u32 */
-	__u32 producer __attribute__((aligned(64)));
-	__u32 consumer __attribute__((aligned(64)));
-};
-/* Used for the RX and TX queues for packets */
-struct xdp_rxtx_ring {
-	struct xdp_ring ptrs;
-	struct xdp_desc desc[0] __attribute__((aligned(64)));
-};
-/* Used for the fill and completion queues for buffers */
-struct xdp_umem_ring {
-	struct xdp_ring ptrs;
-	__u32 desc[0] __attribute__((aligned(64)));
-};
 #endif /* _LINUX_IF_XDP_H */
--- a/include/uapi/linux/seg6_local.h
+++ b/include/uapi/linux/seg6_local.h
@@ -25,6 +25,7 @@ enum {
 	SEG6_LOCAL_NH6,
 	SEG6_LOCAL_IIF,
 	SEG6_LOCAL_OIF,
+	SEG6_LOCAL_BPF,
 	__SEG6_LOCAL_MAX,
 };
 #define SEG6_LOCAL_MAX (__SEG6_LOCAL_MAX - 1)
@@ -59,10 +60,21 @@ enum {
 	SEG6_LOCAL_ACTION_END_AS	= 13,
 	/* forward to SR-unaware VNF with masquerading */
 	SEG6_LOCAL_ACTION_END_AM	= 14,
+	/* custom BPF action */
+	SEG6_LOCAL_ACTION_END_BPF	= 15,
 	__SEG6_LOCAL_ACTION_MAX,
 };
 #define SEG6_LOCAL_ACTION_MAX (__SEG6_LOCAL_ACTION_MAX - 1)
+enum {
+	SEG6_LOCAL_BPF_PROG_UNSPEC,
+	SEG6_LOCAL_BPF_PROG,
+	SEG6_LOCAL_BPF_PROG_NAME,
+	__SEG6_LOCAL_BPF_PROG_MAX,
+};
+#define SEG6_LOCAL_BPF_PROG_MAX (__SEG6_LOCAL_BPF_PROG_MAX - 1)
 #endif
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -352,7 +352,7 @@ static void array_map_seq_show_elem(struct bpf_map *map, void *key,
 	}
 	seq_printf(m, "%u: ", *(u32 *)key);
-	btf_type_seq_show(map->btf, map->btf_value_id, value, m);
+	btf_type_seq_show(map->btf, map->btf_value_type_id, value, m);
 	seq_puts(m, "\n");
 	rcu_read_unlock();

--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -12,6 +12,7 @@
 #include <linux/uaccess.h>
 #include <linux/kernel.h>
 #include <linux/idr.h>
+#include <linux/sort.h>
 #include <linux/bpf_verifier.h>
 #include <linux/btf.h>
@@ -162,13 +163,16 @@
 #define BITS_ROUNDUP_BYTES(bits) \
 	(BITS_ROUNDDOWN_BYTES(bits) + !!BITS_PER_BYTE_MASKED(bits))
+#define BTF_INFO_MASK 0x0f00ffff
+#define BTF_INT_MASK 0x0fffffff
+#define BTF_TYPE_ID_VALID(type_id) ((type_id) <= BTF_MAX_TYPE)
+#define BTF_STR_OFFSET_VALID(name_off) ((name_off) <= BTF_MAX_NAME_OFFSET)
 /* 16MB for 64k structs and each has 16 members and
 * a few MB spaces for the string section.
 * The hard limit is S32_MAX.
 */
 #define BTF_MAX_SIZE (16 * 1024 * 1024)
-/* 64k. We can raise it later. The hard limit is S32_MAX. */
-#define BTF_MAX_NR_TYPES 65535
 #define for_each_member(i, struct_type, member)			\
 	for (i = 0, member = btf_type_member(struct_type);	\
@@ -184,15 +188,13 @@ static DEFINE_IDR(btf_idr);
 static DEFINE_SPINLOCK(btf_idr_lock);
 struct btf {
-	union {
+	void *data;
-		struct btf_header *hdr;
-		void *data;
-	};
 	struct btf_type **types;
 	u32 *resolved_ids;
 	u32 *resolved_sizes;
 	const char *strings;
 	void *nohdr_data;
+	struct btf_header hdr;
 	u32 nr_types;
 	u32 types_size;
 	u32 data_size;
@@ -228,6 +230,11 @@ enum resolve_mode {
 #define MAX_RESOLVE_DEPTH 32
+struct btf_sec_info {
+	u32 off;
+	u32 len;
+};
 struct btf_verifier_env {
 	struct btf *btf;
 	u8 *visit_states;
@@ -379,8 +386,6 @@ static const char *btf_int_encoding_str(u8 encoding)
 		return "CHAR";
 	else if (encoding == BTF_INT_BOOL)
 		return "BOOL";
-	else if (encoding == BTF_INT_VARARGS)
-		return "VARARGS";
 	else
 		return "UNKN";
 }
@@ -417,16 +422,16 @@ static const struct btf_kind_operations *btf_type_ops(const struct btf_type *t)
 static bool btf_name_offset_valid(const struct btf *btf, u32 offset)
 {
-	return !BTF_STR_TBL_ELF_ID(offset) &&
+	return BTF_STR_OFFSET_VALID(offset) &&
-		BTF_STR_OFFSET(offset) < btf->hdr->str_len;
+		offset < btf->hdr.str_len;
 }
 static const char *btf_name_by_offset(const struct btf *btf, u32 offset)
 {
-	if (!BTF_STR_OFFSET(offset))
+	if (!offset)
 		return "(anon)";
-	else if (BTF_STR_OFFSET(offset) < btf->hdr->str_len)
+	else if (offset < btf->hdr.str_len)
-		return &btf->strings[BTF_STR_OFFSET(offset)];
+		return &btf->strings[offset];
 	else
 		return "(invalid-name-offset)";
 }
@@ -439,6 +444,28 @@ static const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id)
 	return btf->types[type_id];
 }
+/*
+ * Regular int is not a bit field and it must be either
+ * u8/u16/u32/u64.
+ */
+static bool btf_type_int_is_regular(const struct btf_type *t)
+{
+	u16 nr_bits, nr_bytes;
+	u32 int_data;
+	int_data = btf_type_int(t);
+	nr_bits = BTF_INT_BITS(int_data);
+	nr_bytes = BITS_ROUNDUP_BYTES(nr_bits);
+	if (BITS_PER_BYTE_MASKED(nr_bits) ||
+	    BTF_INT_OFFSET(int_data) ||
+	    (nr_bytes != sizeof(u8) && nr_bytes != sizeof(u16) &&
+	     nr_bytes != sizeof(u32) && nr_bytes != sizeof(u64))) {
+		return false;
+	}
+	return true;
+}
 __printf(2, 3) static void __btf_verifier_log(struct bpf_verifier_log *log,
 					      const char *fmt, ...)
 {
@@ -536,7 +563,8 @@ static void btf_verifier_log_member(struct btf_verifier_env *env,
 	__btf_verifier_log(log, "\n");
 }
-static void btf_verifier_log_hdr(struct btf_verifier_env *env)
+static void btf_verifier_log_hdr(struct btf_verifier_env *env,
+				 u32 btf_data_size)
 {
 	struct bpf_verifier_log *log = &env->log;
 	const struct btf *btf = env->btf;
@@ -545,19 +573,16 @@ static void btf_verifier_log_hdr(struct btf_verifier_env *env)
 	if (!bpf_verifier_log_needed(log))
 		return;
-	hdr = btf->hdr;
+	hdr = &btf->hdr;
 	__btf_verifier_log(log, "magic: 0x%x\n", hdr->magic);
 	__btf_verifier_log(log, "version: %u\n", hdr->version);
 	__btf_verifier_log(log, "flags: 0x%x\n", hdr->flags);
-	__btf_verifier_log(log, "parent_label: %u\n", hdr->parent_label);
+	__btf_verifier_log(log, "hdr_len: %u\n", hdr->hdr_len);
-	__btf_verifier_log(log, "parent_name: %u\n", hdr->parent_name);
-	__btf_verifier_log(log, "label_off: %u\n", hdr->label_off);
-	__btf_verifier_log(log, "object_off: %u\n", hdr->object_off);
-	__btf_verifier_log(log, "func_off: %u\n", hdr->func_off);
 	__btf_verifier_log(log, "type_off: %u\n", hdr->type_off);
+	__btf_verifier_log(log, "type_len: %u\n", hdr->type_len);
 	__btf_verifier_log(log, "str_off: %u\n", hdr->str_off);
 	__btf_verifier_log(log, "str_len: %u\n", hdr->str_len);
-	__btf_verifier_log(log, "btf_total_size: %u\n", btf->data_size);
+	__btf_verifier_log(log, "btf_total_size: %u\n", btf_data_size);
 }
 static int btf_add_type(struct btf_verifier_env *env, struct btf_type *t)
@@ -574,13 +599,13 @@ static int btf_add_type(struct btf_verifier_env *env, struct btf_type *t)
 		struct btf_type **new_types;
 		u32 expand_by, new_size;
-		if (btf->types_size == BTF_MAX_NR_TYPES) {
+		if (btf->types_size == BTF_MAX_TYPE) {
 			btf_verifier_log(env, "Exceeded max num of types");
 			return -E2BIG;
 		}
 		expand_by = max_t(u32, btf->types_size >> 2, 16);
-		new_size = min_t(u32, BTF_MAX_NR_TYPES,
+		new_size = min_t(u32, BTF_MAX_TYPE,
 				 btf->types_size + expand_by);
 		new_types = kvzalloc(new_size * sizeof(*new_types),
@@ -910,6 +935,12 @@ static s32 btf_int_check_meta(struct btf_verifier_env *env,
 	}
 	int_data = btf_type_int(t);
+	if (int_data & ~BTF_INT_MASK) {
+		btf_verifier_log_basic(env, t, "Invalid int_data:%x",
+				       int_data);
+		return -EINVAL;
+	}
 	nr_bits = BTF_INT_BITS(int_data) + BTF_INT_OFFSET(int_data);
 	if (nr_bits > BITS_PER_U64) {
@@ -923,12 +954,17 @@ static s32 btf_int_check_meta(struct btf_verifier_env *env,
 		return -EINVAL;
 	}
+	/*
+	 * Only one of the encoding bits is allowed and it
+	 * should be sufficient for the pretty print purpose (i.e. decoding).
+	 * Multiple bits can be allowed later if it is found
+	 * to be insufficient.
+	 */
 	encoding = BTF_INT_ENCODING(int_data);
 	if (encoding &&
 	    encoding != BTF_INT_SIGNED &&
 	    encoding != BTF_INT_CHAR &&
-	    encoding != BTF_INT_BOOL &&
+	    encoding != BTF_INT_BOOL) {
-	    encoding != BTF_INT_VARARGS) {
 		btf_verifier_log_type(env, t, "Unsupported encoding");
 		return -ENOTSUPP;
 	}
@@ -1102,7 +1138,7 @@ static int btf_ref_type_check_meta(struct btf_verifier_env *env,
 		return -EINVAL;
 	}
-	if (BTF_TYPE_PARENT(t->type)) {
+	if (!BTF_TYPE_ID_VALID(t->type)) {
 		btf_verifier_log_type(env, t, "Invalid type_id");
 		return -EINVAL;
 	}
@@ -1306,14 +1342,16 @@ static s32 btf_array_check_meta(struct btf_verifier_env *env,
 		return -EINVAL;
 	}
-	/* We are a little forgiving on array->index_type since
+	/* Array elem type and index type cannot be in type void,
-	 * the kernel is not using it.
+	 * so !array->type and !array->index_type are not allowed.
-	 */
-	/* Array elem cannot be in type void,
-	 * so !array->type is not allowed.
 	 */
-	if (!array->type || BTF_TYPE_PARENT(array->type)) {
+	if (!array->type || !BTF_TYPE_ID_VALID(array->type)) {
-		btf_verifier_log_type(env, t, "Invalid type_id");
+		btf_verifier_log_type(env, t, "Invalid elem");
+		return -EINVAL;
+	}
+	if (!array->index_type || !BTF_TYPE_ID_VALID(array->index_type)) {
+		btf_verifier_log_type(env, t, "Invalid index");
 		return -EINVAL;
 	}
@@ -1326,11 +1364,32 @@ static int btf_array_resolve(struct btf_verifier_env *env,
 			     const struct resolve_vertex *v)
 {
 	const struct btf_array *array = btf_type_array(v->t);
-	const struct btf_type *elem_type;
+	const struct btf_type *elem_type, *index_type;
-	u32 elem_type_id = array->type;
+	u32 elem_type_id, index_type_id;
 	struct btf *btf = env->btf;
 	u32 elem_size;
+	/* Check array->index_type */
+	index_type_id = array->index_type;
+	index_type = btf_type_by_id(btf, index_type_id);
+	if (btf_type_is_void_or_null(index_type)) {
+		btf_verifier_log_type(env, v->t, "Invalid index");
+		return -EINVAL;
+	}
+	if (!env_type_is_resolve_sink(env, index_type) &&
+	    !env_type_is_resolved(env, index_type_id))
+		return env_stack_push(env, index_type, index_type_id);
+	index_type = btf_type_id_size(btf, &index_type_id, NULL);
+	if (!index_type || !btf_type_is_int(index_type) ||
+	    !btf_type_int_is_regular(index_type)) {
+		btf_verifier_log_type(env, v->t, "Invalid index");
+		return -EINVAL;
+	}
+	/* Check array->type */
+	elem_type_id = array->type;
 	elem_type = btf_type_by_id(btf, elem_type_id);
 	if (btf_type_is_void_or_null(elem_type)) {
 		btf_verifier_log_type(env, v->t,
@@ -1348,22 +1407,9 @@ static int btf_array_resolve(struct btf_verifier_env *env,
 		return -EINVAL;
 	}
-	if (btf_type_is_int(elem_type)) {
+	if (btf_type_is_int(elem_type) && !btf_type_int_is_regular(elem_type)) {
-		int int_type_data = btf_type_int(elem_type);
+		btf_verifier_log_type(env, v->t, "Invalid array of int");
-		u16 nr_bits = BTF_INT_BITS(int_type_data);
+		return -EINVAL;
-		u16 nr_bytes = BITS_ROUNDUP_BYTES(nr_bits);
-		/* Put more restriction on array of int.  The int cannot
-		 * be a bit field and it must be either u8/u16/u32/u64.
-		 */
-		if (BITS_PER_BYTE_MASKED(nr_bits) ||
-		    BTF_INT_OFFSET(int_type_data) ||
-		    (nr_bytes != sizeof(u8) && nr_bytes != sizeof(u16) &&
-		     nr_bytes != sizeof(u32) && nr_bytes != sizeof(u64))) {
-			btf_verifier_log_type(env, v->t,
-					      "Invalid array of int");
-			return -EINVAL;
-		}
 	}
 	if (array->nelems && elem_size > U32_MAX / array->nelems) {
@@ -1473,7 +1519,7 @@ static s32 btf_struct_check_meta(struct btf_verifier_env *env,
 		}
 		/* A member cannot be in type void */
-		if (!member->type || BTF_TYPE_PARENT(member->type)) {
+		if (!member->type || !BTF_TYPE_ID_VALID(member->type)) {
 			btf_verifier_log_member(env, t, member,
 						"Invalid type_id");
 			return -EINVAL;
@@ -1726,6 +1772,12 @@ static s32 btf_check_meta(struct btf_verifier_env *env,
 	}
 	meta_left -= sizeof(*t);
+	if (t->info & ~BTF_INFO_MASK) {
+		btf_verifier_log(env, "[%u] Invalid btf_info:%x",
+				 env->log_type_id, t->info);
+		return -EINVAL;
+	}
 	if (BTF_INFO_KIND(t->info) > BTF_KIND_MAX ||
 	    BTF_INFO_KIND(t->info) == BTF_KIND_UNKN) {
 		btf_verifier_log(env, "[%u] Invalid kind:%u",
@@ -1754,9 +1806,9 @@ static int btf_check_all_metas(struct btf_verifier_env *env)
 	struct btf_header *hdr;
 	void *cur, *end;
-	hdr = btf->hdr;
+	hdr = &btf->hdr;
 	cur = btf->nohdr_data + hdr->type_off;
-	end = btf->nohdr_data + hdr->str_off;
+	end = btf->nohdr_data + hdr->type_len;
 	env->log_type_id = 1;
 	while (cur < end) {
@@ -1866,8 +1918,20 @@ static int btf_check_all_types(struct btf_verifier_env *env)
 static int btf_parse_type_sec(struct btf_verifier_env *env)
 {
+	const struct btf_header *hdr = &env->btf->hdr;
 	int err;
+	/* Type section must align to 4 bytes */
+	if (hdr->type_off & (sizeof(u32) - 1)) {
+		btf_verifier_log(env, "Unaligned type_off");
+		return -EINVAL;
+	}
+	if (!hdr->type_len) {
+		btf_verifier_log(env, "No type found");
+		return -EINVAL;
+	}
 	err = btf_check_all_metas(env);
 	if (err)
 		return err;
@@ -1881,10 +1945,15 @@ static int btf_parse_str_sec(struct btf_verifier_env *env)
 	struct btf *btf = env->btf;
 	const char *start, *end;
-	hdr = btf->hdr;
+	hdr = &btf->hdr;
 	start = btf->nohdr_data + hdr->str_off;
 	end = start + hdr->str_len;
+	if (end != btf->data + btf->data_size) {
+		btf_verifier_log(env, "String section is not at the end");
+		return -EINVAL;
+	}
 	if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_NAME_OFFSET ||
 	    start[0] || end[-1]) {
 		btf_verifier_log(env, "Invalid string section");
@@ -1896,20 +1965,121 @@ static int btf_parse_str_sec(struct btf_verifier_env *env)
 	return 0;
 }
-static int btf_parse_hdr(struct btf_verifier_env *env)
+static const size_t btf_sec_info_offset[] = {
+	offsetof(struct btf_header, type_off),
+	offsetof(struct btf_header, str_off),
+};
+static int btf_sec_info_cmp(const void *a, const void *b)
+{
+	const struct btf_sec_info *x = a;
+	const struct btf_sec_info *y = b;
+	return (int)(x->off - y->off) ? : (int)(x->len - y->len);
+}
+static int btf_check_sec_info(struct btf_verifier_env *env,
+			      u32 btf_data_size)
 {
+	struct btf_sec_info secs[ARRAY_SIZE(btf_sec_info_offset)];
+	u32 total, expected_total, i;
 	const struct btf_header *hdr;
-	struct btf *btf = env->btf;
+	const struct btf *btf;
-	u32 meta_left;
+	btf = env->btf;
+	hdr = &btf->hdr;
-	if (btf->data_size < sizeof(*hdr)) {
+	/* Populate the secs from hdr */
+	for (i = 0; i < ARRAY_SIZE(btf_sec_info_offset); i++)
+		secs[i] = *(struct btf_sec_info *)((void *)hdr +
+						   btf_sec_info_offset[i]);
+	sort(secs, ARRAY_SIZE(btf_sec_info_offset),
+	     sizeof(struct btf_sec_info), btf_sec_info_cmp, NULL);
+	/* Check for gaps and overlap among sections */
+	total = 0;
+	expected_total = btf_data_size - hdr->hdr_len;
+	for (i = 0; i < ARRAY_SIZE(btf_sec_info_offset); i++) {
+		if (expected_total < secs[i].off) {
+			btf_verifier_log(env, "Invalid section offset");
+			return -EINVAL;
+		}
+		if (total < secs[i].off) {
+			/* gap */
+			btf_verifier_log(env, "Unsupported section found");
+			return -EINVAL;
+		}
+		if (total > secs[i].off) {
+			btf_verifier_log(env, "Section overlap found");
+			return -EINVAL;
+		}
+		if (expected_total - total < secs[i].len) {
+			btf_verifier_log(env,
+					 "Total section length too long");
+			return -EINVAL;
+		}
+		total += secs[i].len;
+	}
+	/* There is data other than hdr and known sections */
+	if (expected_total != total) {
+		btf_verifier_log(env, "Unsupported section found");
+		return -EINVAL;
+	}
+	return 0;
+}
+static int btf_parse_hdr(struct btf_verifier_env *env, void __user *btf_data,
+			 u32 btf_data_size)
+{
+	const struct btf_header *hdr;
+	u32 hdr_len, hdr_copy;
+	/*
+	 * Minimal part of the "struct btf_header" that
+	 * contains the hdr_len.
+	 */
+	struct btf_min_header {
+		u16	magic;
+		u8	version;
+		u8	flags;
+		u32	hdr_len;
+	} __user *min_hdr;
+	struct btf *btf;
+	int err;
+	btf = env->btf;
+	min_hdr = btf_data;
+	if (btf_data_size < sizeof(*min_hdr)) {
+		btf_verifier_log(env, "hdr_len not found");
+		return -EINVAL;
+	}
+	if (get_user(hdr_len, &min_hdr->hdr_len))
+		return -EFAULT;
+	if (btf_data_size < hdr_len) {
 		btf_verifier_log(env, "btf_header not found");
 		return -EINVAL;
 	}
-	btf_verifier_log_hdr(env);
+	err = bpf_check_uarg_tail_zero(btf_data, sizeof(btf->hdr), hdr_len);
+	if (err) {
+		if (err == -E2BIG)
+			btf_verifier_log(env, "Unsupported btf_header");
+		return err;
+	}
+	hdr_copy = min_t(u32, hdr_len, sizeof(btf->hdr));
+	if (copy_from_user(&btf->hdr, btf_data, hdr_copy))
+		return -EFAULT;
+	hdr = &btf->hdr;
+	btf_verifier_log_hdr(env, btf_data_size);
-	hdr = btf->hdr;
 	if (hdr->magic != BTF_MAGIC) {
 		btf_verifier_log(env, "Invalid magic");
 		return -EINVAL;
@@ -1925,26 +2095,14 @@ static int btf_parse_hdr(struct btf_verifier_env *env)
 		return -ENOTSUPP;
 	}
-	meta_left = btf->data_size - sizeof(*hdr);
+	if (btf_data_size == hdr->hdr_len) {
-	if (!meta_left) {
 		btf_verifier_log(env, "No data");
 		return -EINVAL;
 	}
-	if (meta_left < hdr->type_off || hdr->str_off <= hdr->type_off ||
+	err = btf_check_sec_info(env, btf_data_size);
-	    /* Type section must align to 4 bytes */
+	if (err)
-	    hdr->type_off & (sizeof(u32) - 1)) {
+		return err;
-		btf_verifier_log(env, "Invalid type_off");
-		return -EINVAL;
-	}
-	if (meta_left < hdr->str_off ||
-	    meta_left - hdr->str_off < hdr->str_len) {
-		btf_verifier_log(env, "Invalid str_off or str_len");
-		return -EINVAL;
-	}
-	btf->nohdr_data = btf->hdr + 1;
 	return 0;
 }
@@ -1987,6 +2145,11 @@ static struct btf *btf_parse(void __user *btf_data, u32 btf_data_size,
 		err = -ENOMEM;
 		goto errout;
 	}
+	env->btf = btf;
+	err = btf_parse_hdr(env, btf_data, btf_data_size);
+	if (err)
+		goto errout;
 	data = kvmalloc(btf_data_size, GFP_KERNEL | __GFP_NOWARN);
 	if (!data) {
@@ -1996,18 +2159,13 @@ static struct btf *btf_parse(void __user *btf_data, u32 btf_data_size,
 	btf->data = data;
 	btf->data_size = btf_data_size;
+	btf->nohdr_data = btf->data + btf->hdr.hdr_len;
 	if (copy_from_user(data, btf_data, btf_data_size)) {
 		err = -EFAULT;
 		goto errout;
 	}
-	env->btf = btf;
-	err = btf_parse_hdr(env);
-	if (err)
-		goto errout;
 	err = btf_parse_str_sec(env);
 	if (err)
 		goto errout;
@@ -2016,16 +2174,14 @@ static struct btf *btf_parse(void __user *btf_data, u32 btf_data_size,
 	if (err)
 		goto errout;
-	if (!err && log->level && bpf_verifier_log_full(log)) {
+	if (log->level && bpf_verifier_log_full(log)) {
 		err = -ENOSPC;
 		goto errout;
 	}
-	if (!err) {
+	btf_verifier_env_free(env);
-		btf_verifier_env_free(env);
+	refcount_set(&btf->refcnt, 1);
-		refcount_set(&btf->refcnt, 1);
+	return btf;
-		return btf;
-	}
 errout:
 	btf_verifier_env_free(env);

--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -578,7 +578,7 @@ static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu,
 		err = __ptr_ring_produce(q, xdpf);
 		if (err) {
 			drops++;
-			xdp_return_frame(xdpf);
+			xdp_return_frame_rx_napi(xdpf);
 		}
 		processed++;
 	}

--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -48,15 +48,25 @@
 * calls will fail at this point.
 */
 #include <linux/bpf.h>
+#include <net/xdp.h>
 #include <linux/filter.h>
+#include <trace/events/xdp.h>
 #define DEV_CREATE_FLAG_MASK \
 	(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
+#define DEV_MAP_BULK_SIZE 16
+struct xdp_bulk_queue {
+	struct xdp_frame *q[DEV_MAP_BULK_SIZE];
+	struct net_device *dev_rx;
+	unsigned int count;
+};
 struct bpf_dtab_netdev {
-	struct net_device *dev;
+	struct net_device *dev; /* must be first member, due to tracepoint */
 	struct bpf_dtab *dtab;
 	unsigned int bit;
+	struct xdp_bulk_queue __percpu *bulkq;
 	struct rcu_head rcu;
 };
@@ -206,6 +216,50 @@ void __dev_map_insert_ctx(struct bpf_map *map, u32 bit)
 	__set_bit(bit, bitmap);
 }
+static int bq_xmit_all(struct bpf_dtab_netdev *obj,
+			 struct xdp_bulk_queue *bq)
+{
+	struct net_device *dev = obj->dev;
+	int sent = 0, drops = 0, err = 0;
+	int i;
+	if (unlikely(!bq->count))
+		return 0;
+	for (i = 0; i < bq->count; i++) {
+		struct xdp_frame *xdpf = bq->q[i];
+		prefetch(xdpf);
+	}
+	sent = dev->netdev_ops->ndo_xdp_xmit(dev, bq->count, bq->q);
+	if (sent < 0) {
+		err = sent;
+		sent = 0;
+		goto error;
+	}
+	drops = bq->count - sent;
+out:
+	bq->count = 0;
+	trace_xdp_devmap_xmit(&obj->dtab->map, obj->bit,
+			      sent, drops, bq->dev_rx, dev, err);
+	bq->dev_rx = NULL;
+	return 0;
+error:
+	/* If ndo_xdp_xmit fails with an errno, no frames have been
+	 * xmit'ed and it's our responsibility to them free all.
+	 */
+	for (i = 0; i < bq->count; i++) {
+		struct xdp_frame *xdpf = bq->q[i];
+		/* RX path under NAPI protection, can return frames faster */
+		xdp_return_frame_rx_napi(xdpf);
+		drops++;
+	}
+	goto out;
+}
 /* __dev_map_flush is called from xdp_do_flush_map() which _must_ be signaled
 * from the driver before returning from its napi->poll() routine. The poll()
 * routine is called either from busy_poll context or net_rx_action signaled
@@ -221,6 +275,7 @@ void __dev_map_flush(struct bpf_map *map)
 	for_each_set_bit(bit, bitmap, map->max_entries) {
 		struct bpf_dtab_netdev *dev = READ_ONCE(dtab->netdev_map[bit]);
+		struct xdp_bulk_queue *bq;
 		struct net_device *netdev;
 		/* This is possible if the dev entry is removed by user space
@@ -230,6 +285,9 @@ void __dev_map_flush(struct bpf_map *map)
 			continue;
 		__clear_bit(bit, bitmap);
+		bq = this_cpu_ptr(dev->bulkq);
+		bq_xmit_all(dev, bq);
 		netdev = dev->dev;
 		if (likely(netdev->netdev_ops->ndo_xdp_flush))
 			netdev->netdev_ops->ndo_xdp_flush(netdev);
@@ -240,21 +298,61 @@ void __dev_map_flush(struct bpf_map *map)
 * update happens in parallel here a dev_put wont happen until after reading the
 * ifindex.
 */
-struct net_device  *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
+struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
 {
 	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
-	struct bpf_dtab_netdev *dev;
+	struct bpf_dtab_netdev *obj;
 	if (key >= map->max_entries)
 		return NULL;
-	dev = READ_ONCE(dtab->netdev_map[key]);
+	obj = READ_ONCE(dtab->netdev_map[key]);
-	return dev ? dev->dev : NULL;
+	return obj;
+}
+/* Runs under RCU-read-side, plus in softirq under NAPI protection.
+ * Thus, safe percpu variable access.
+ */
+static int bq_enqueue(struct bpf_dtab_netdev *obj, struct xdp_frame *xdpf,
+		      struct net_device *dev_rx)
+{
+	struct xdp_bulk_queue *bq = this_cpu_ptr(obj->bulkq);
+	if (unlikely(bq->count == DEV_MAP_BULK_SIZE))
+		bq_xmit_all(obj, bq);
+	/* Ingress dev_rx will be the same for all xdp_frame's in
+	 * bulk_queue, because bq stored per-CPU and must be flushed
+	 * from net_device drivers NAPI func end.
+	 */
+	if (!bq->dev_rx)
+		bq->dev_rx = dev_rx;
+	bq->q[bq->count++] = xdpf;
+	return 0;
+}
+int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
+		    struct net_device *dev_rx)
+{
+	struct net_device *dev = dst->dev;
+	struct xdp_frame *xdpf;
+	if (!dev->netdev_ops->ndo_xdp_xmit)
+		return -EOPNOTSUPP;
+	xdpf = convert_to_xdp_frame(xdp);
+	if (unlikely(!xdpf))
+		return -EOVERFLOW;
+	return bq_enqueue(dst, xdpf, dev_rx);
 }
 static void *dev_map_lookup_elem(struct bpf_map *map, void *key)
 {
-	struct net_device *dev = __dev_map_lookup_elem(map, *(u32 *)key);
+	struct bpf_dtab_netdev *obj = __dev_map_lookup_elem(map, *(u32 *)key);
+	struct net_device *dev = dev = obj ? obj->dev : NULL;
 	return dev ? &dev->ifindex : NULL;
 }
@@ -263,13 +361,18 @@ static void dev_map_flush_old(struct bpf_dtab_netdev *dev)
 {
 	if (dev->dev->netdev_ops->ndo_xdp_flush) {
 		struct net_device *fl = dev->dev;
+		struct xdp_bulk_queue *bq;
 		unsigned long *bitmap;
 		int cpu;
 		for_each_online_cpu(cpu) {
 			bitmap = per_cpu_ptr(dev->dtab->flush_needed, cpu);
 			__clear_bit(dev->bit, bitmap);
+			bq = per_cpu_ptr(dev->bulkq, cpu);
+			bq_xmit_all(dev, bq);
 			fl->netdev_ops->ndo_xdp_flush(dev->dev);
 		}
 	}
@@ -281,6 +384,7 @@ static void __dev_map_entry_free(struct rcu_head *rcu)
 	dev = container_of(rcu, struct bpf_dtab_netdev, rcu);
 	dev_map_flush_old(dev);
+	free_percpu(dev->bulkq);
 	dev_put(dev->dev);
 	kfree(dev);
 }
@@ -313,6 +417,7 @@ static int dev_map_update_elem(struct bpf_map *map, void *key, void *value,
 {
 	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
 	struct net *net = current->nsproxy->net_ns;
+	gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN;
 	struct bpf_dtab_netdev *dev, *old_dev;
 	u32 i = *(u32 *)key;
 	u32 ifindex = *(u32 *)value;
@@ -327,13 +432,20 @@ static int dev_map_update_elem(struct bpf_map *map, void *key, void *value,
 	if (!ifindex) {
 		dev = NULL;
 	} else {
-		dev = kmalloc_node(sizeof(*dev), GFP_ATOMIC | __GFP_NOWARN,
+		dev = kmalloc_node(sizeof(*dev), gfp, map->numa_node);
-				   map->numa_node);
 		if (!dev)
 			return -ENOMEM;
+		dev->bulkq = __alloc_percpu_gfp(sizeof(*dev->bulkq),
+						sizeof(void *), gfp);
+		if (!dev->bulkq) {
+			kfree(dev);
+			return -ENOMEM;
+		}
 		dev->dev = dev_get_by_index(net, ifindex);
 		if (!dev->dev) {
+			free_percpu(dev->bulkq);
 			kfree(dev);
 			return -EINVAL;
 		}
@@ -405,6 +517,9 @@ static struct notifier_block dev_map_notifier = {
 static int __init dev_map_init(void)
 {
+	/* Assure tracepoint shadow struct _bpf_dtab_netdev is in sync */
+	BUILD_BUG_ON(offsetof(struct bpf_dtab_netdev, dev) !=
+		     offsetof(struct _bpf_dtab_netdev, dev));
 	register_netdevice_notifier(&dev_map_notifier);
 	return 0;
 }

--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -523,6 +523,7 @@ static unsigned int smap_do_tx_msg(struct sock *sk,
 	}
 	bpf_compute_data_pointers_sg(md);
+	md->sk = sk;
 	rc = (*prog->bpf_func)(md, prog->insnsi);
 	psock->apply_bytes = md->apply_bytes;
@@ -1713,7 +1714,7 @@ static int __sock_map_ctx_update_elem(struct bpf_map *map,
 	struct smap_psock_map_entry *e = NULL;
 	struct smap_psock *psock;
 	bool new = false;
-	int err;
+	int err = 0;
 	/* 1. If sock map has BPF programs those will be inherited by the
 	 * sock being added. If the sock is already attached to BPF programs
@@ -1823,7 +1824,6 @@ static int __sock_map_ctx_update_elem(struct bpf_map *map,
 	write_unlock_bh(&sock->sk_callback_lock);
 	return err;
 out_free:
-	kfree(e);
 	smap_release_sock(psock, sock);
 out_progs:
 	if (parse && verdict) {

--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -18,7 +18,9 @@
 #include <linux/vmalloc.h>
 #include <linux/mmzone.h>
 #include <linux/anon_inodes.h>
+#include <linux/fdtable.h>
 #include <linux/file.h>
+#include <linux/fs.h>
 #include <linux/license.h>
 #include <linux/filter.h>
 #include <linux/version.h>
@@ -65,9 +67,9 @@ static const struct bpf_map_ops * const bpf_map_types[] = {
 * copy_from_user() call. However, this is not a concern since this function is
 * meant to be a future-proofing of bits.
 */
-static int check_uarg_tail_zero(void __user *uaddr,
+int bpf_check_uarg_tail_zero(void __user *uaddr,
-				size_t expected_size,
+			     size_t expected_size,
-				size_t actual_size)
+			     size_t actual_size)
 {
 	unsigned char __user *addr;
 	unsigned char __user *end;
@@ -422,7 +424,7 @@ static int bpf_obj_name_cpy(char *dst, const char *src)
 	return 0;
 }
-#define BPF_MAP_CREATE_LAST_FIELD btf_value_id
+#define BPF_MAP_CREATE_LAST_FIELD btf_value_type_id
 /* called via syscall */
 static int map_create(union bpf_attr *attr)
 {
@@ -457,10 +459,10 @@ static int map_create(union bpf_attr *attr)
 	atomic_set(&map->usercnt, 1);
 	if (bpf_map_support_seq_show(map) &&
-	    (attr->btf_key_id || attr->btf_value_id)) {
+	    (attr->btf_key_type_id || attr->btf_value_type_id)) {
 		struct btf *btf;
-		if (!attr->btf_key_id || !attr->btf_value_id) {
+		if (!attr->btf_key_type_id || !attr->btf_value_type_id) {
 			err = -EINVAL;
 			goto free_map_nouncharge;
 		}
@@ -471,16 +473,16 @@ static int map_create(union bpf_attr *attr)
 			goto free_map_nouncharge;
 		}
-		err = map->ops->map_check_btf(map, btf, attr->btf_key_id,
+		err = map->ops->map_check_btf(map, btf, attr->btf_key_type_id,
-					      attr->btf_value_id);
+					      attr->btf_value_type_id);
 		if (err) {
 			btf_put(btf);
 			goto free_map_nouncharge;
 		}
 		map->btf = btf;
-		map->btf_key_id = attr->btf_key_id;
+		map->btf_key_type_id = attr->btf_key_type_id;
-		map->btf_value_id = attr->btf_value_id;
+		map->btf_value_type_id = attr->btf_value_type_id;
 	}
 	err = security_bpf_map_alloc(map);
@@ -1899,7 +1901,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
 	u32 ulen;
 	int err;
-	err = check_uarg_tail_zero(uinfo, sizeof(info), info_len);
+	err = bpf_check_uarg_tail_zero(uinfo, sizeof(info), info_len);
 	if (err)
 		return err;
 	info_len = min_t(u32, sizeof(info), info_len);
@@ -1933,6 +1935,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
 	if (!capable(CAP_SYS_ADMIN)) {
 		info.jited_prog_len = 0;
 		info.xlated_prog_len = 0;
+		info.nr_jited_ksyms = 0;
 		goto done;
 	}
@@ -1969,18 +1972,93 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
 	 * for offload.
 	 */
 	ulen = info.jited_prog_len;
-	info.jited_prog_len = prog->jited_len;
+	if (prog->aux->func_cnt) {
+		u32 i;
+		info.jited_prog_len = 0;
+		for (i = 0; i < prog->aux->func_cnt; i++)
+			info.jited_prog_len += prog->aux->func[i]->jited_len;
+	} else {
+		info.jited_prog_len = prog->jited_len;
+	}
 	if (info.jited_prog_len && ulen) {
 		if (bpf_dump_raw_ok()) {
 			uinsns = u64_to_user_ptr(info.jited_prog_insns);
 			ulen = min_t(u32, info.jited_prog_len, ulen);
-			if (copy_to_user(uinsns, prog->bpf_func, ulen))
-				return -EFAULT;
+			/* for multi-function programs, copy the JITed
+			 * instructions for all the functions
+			 */
+			if (prog->aux->func_cnt) {
+				u32 len, free, i;
+				u8 *img;
+				free = ulen;
+				for (i = 0; i < prog->aux->func_cnt; i++) {
+					len = prog->aux->func[i]->jited_len;
+					len = min_t(u32, len, free);
+					img = (u8 *) prog->aux->func[i]->bpf_func;
+					if (copy_to_user(uinsns, img, len))
+						return -EFAULT;
+					uinsns += len;
+					free -= len;
+					if (!free)
+						break;
+				}
+			} else {
+				if (copy_to_user(uinsns, prog->bpf_func, ulen))
+					return -EFAULT;
+			}
 		} else {
 			info.jited_prog_insns = 0;
 		}
 	}
+	ulen = info.nr_jited_ksyms;
+	info.nr_jited_ksyms = prog->aux->func_cnt;
+	if (info.nr_jited_ksyms && ulen) {
+		if (bpf_dump_raw_ok()) {
+			u64 __user *user_ksyms;
+			ulong ksym_addr;
+			u32 i;
+			/* copy the address of the kernel symbol
+			 * corresponding to each function
+			 */
+			ulen = min_t(u32, info.nr_jited_ksyms, ulen);
+			user_ksyms = u64_to_user_ptr(info.jited_ksyms);
+			for (i = 0; i < ulen; i++) {
+				ksym_addr = (ulong) prog->aux->func[i]->bpf_func;
+				ksym_addr &= PAGE_MASK;
+				if (put_user((u64) ksym_addr, &user_ksyms[i]))
+					return -EFAULT;
+			}
+		} else {
+			info.jited_ksyms = 0;
+		}
+	}
+	ulen = info.nr_jited_func_lens;
+	info.nr_jited_func_lens = prog->aux->func_cnt;
+	if (info.nr_jited_func_lens && ulen) {
+		if (bpf_dump_raw_ok()) {
+			u32 __user *user_lens;
+			u32 func_len, i;
+			/* copy the JITed image lengths for each function */
+			ulen = min_t(u32, info.nr_jited_func_lens, ulen);
+			user_lens = u64_to_user_ptr(info.jited_func_lens);
+			for (i = 0; i < ulen; i++) {
+				func_len = prog->aux->func[i]->jited_len;
+				if (put_user(func_len, &user_lens[i]))
+					return -EFAULT;
+			}
+		} else {
+			info.jited_func_lens = 0;
+		}
+	}
 done:
 	if (copy_to_user(uinfo, &info, info_len) ||
 	    put_user(info_len, &uattr->info.info_len))
@@ -1998,7 +2076,7 @@ static int bpf_map_get_info_by_fd(struct bpf_map *map,
 	u32 info_len = attr->info.info_len;
 	int err;
-	err = check_uarg_tail_zero(uinfo, sizeof(info), info_len);
+	err = bpf_check_uarg_tail_zero(uinfo, sizeof(info), info_len);
 	if (err)
 		return err;
 	info_len = min_t(u32, sizeof(info), info_len);
@@ -2013,8 +2091,8 @@ static int bpf_map_get_info_by_fd(struct bpf_map *map,
 	if (map->btf) {
 		info.btf_id = btf_id(map->btf);
-		info.btf_key_id = map->btf_key_id;
+		info.btf_key_type_id = map->btf_key_type_id;
-		info.btf_value_id = map->btf_value_id;
+		info.btf_value_type_id = map->btf_value_type_id;
 	}
 	if (bpf_map_is_dev_bound(map)) {
@@ -2038,7 +2116,7 @@ static int bpf_btf_get_info_by_fd(struct btf *btf,
 	u32 info_len = attr->info.info_len;
 	int err;
-	err = check_uarg_tail_zero(uinfo, sizeof(*uinfo), info_len);
+	err = bpf_check_uarg_tail_zero(uinfo, sizeof(*uinfo), info_len);
 	if (err)
 		return err;
@@ -2102,6 +2180,132 @@ static int bpf_btf_get_fd_by_id(const union bpf_attr *attr)
 	return btf_get_fd_by_id(attr->btf_id);
 }
+static int bpf_task_fd_query_copy(const union bpf_attr *attr,
+				    union bpf_attr __user *uattr,
+				    u32 prog_id, u32 fd_type,
+				    const char *buf, u64 probe_offset,
+				    u64 probe_addr)
+{
+	char __user *ubuf = u64_to_user_ptr(attr->task_fd_query.buf);
+	u32 len = buf ? strlen(buf) : 0, input_len;
+	int err = 0;
+	if (put_user(len, &uattr->task_fd_query.buf_len))
+		return -EFAULT;
+	input_len = attr->task_fd_query.buf_len;
+	if (input_len && ubuf) {
+		if (!len) {
+			/* nothing to copy, just make ubuf NULL terminated */
+			char zero = '\0';
+			if (put_user(zero, ubuf))
+				return -EFAULT;
+		} else if (input_len >= len + 1) {
+			/* ubuf can hold the string with NULL terminator */
+			if (copy_to_user(ubuf, buf, len + 1))
+				return -EFAULT;
+		} else {
+			/* ubuf cannot hold the string with NULL terminator,
+			 * do a partial copy with NULL terminator.
+			 */
+			char zero = '\0';
+			err = -ENOSPC;
+			if (copy_to_user(ubuf, buf, input_len - 1))
+				return -EFAULT;
+			if (put_user(zero, ubuf + input_len - 1))
+				return -EFAULT;
+		}
+	}
+	if (put_user(prog_id, &uattr->task_fd_query.prog_id) ||
+	    put_user(fd_type, &uattr->task_fd_query.fd_type) ||
+	    put_user(probe_offset, &uattr->task_fd_query.probe_offset) ||
+	    put_user(probe_addr, &uattr->task_fd_query.probe_addr))
+		return -EFAULT;
+	return err;
+}
+#define BPF_TASK_FD_QUERY_LAST_FIELD task_fd_query.probe_addr
+static int bpf_task_fd_query(const union bpf_attr *attr,
+			     union bpf_attr __user *uattr)
+{
+	pid_t pid = attr->task_fd_query.pid;
+	u32 fd = attr->task_fd_query.fd;
+	const struct perf_event *event;
+	struct files_struct *files;
+	struct task_struct *task;
+	struct file *file;
+	int err;
+	if (CHECK_ATTR(BPF_TASK_FD_QUERY))
+		return -EINVAL;
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+	if (attr->task_fd_query.flags != 0)
+		return -EINVAL;
+	task = get_pid_task(find_vpid(pid), PIDTYPE_PID);
+	if (!task)
+		return -ENOENT;
+	files = get_files_struct(task);
+	put_task_struct(task);
+	if (!files)
+		return -ENOENT;
+	err = 0;
+	spin_lock(&files->file_lock);
+	file = fcheck_files(files, fd);
+	if (!file)
+		err = -EBADF;
+	else
+		get_file(file);
+	spin_unlock(&files->file_lock);
+	put_files_struct(files);
+	if (err)
+		goto out;
+	if (file->f_op == &bpf_raw_tp_fops) {
+		struct bpf_raw_tracepoint *raw_tp = file->private_data;
+		struct bpf_raw_event_map *btp = raw_tp->btp;
+		err = bpf_task_fd_query_copy(attr, uattr,
+					     raw_tp->prog->aux->id,
+					     BPF_FD_TYPE_RAW_TRACEPOINT,
+					     btp->tp->name, 0, 0);
+		goto put_file;
+	}
+	event = perf_get_event(file);
+	if (!IS_ERR(event)) {
+		u64 probe_offset, probe_addr;
+		u32 prog_id, fd_type;
+		const char *buf;
+		err = bpf_get_perf_event_info(event, &prog_id, &fd_type,
+					      &buf, &probe_offset,
+					      &probe_addr);
+		if (!err)
+			err = bpf_task_fd_query_copy(attr, uattr, prog_id,
+						     fd_type, buf,
+						     probe_offset,
+						     probe_addr);
+		goto put_file;
+	}
+	err = -ENOTSUPP;
+put_file:
+	fput(file);
+out:
+	return err;
+}
 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
 {
 	union bpf_attr attr = {};
@@ -2110,7 +2314,7 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
 	if (sysctl_unprivileged_bpf_disabled && !capable(CAP_SYS_ADMIN))
 		return -EPERM;
-	err = check_uarg_tail_zero(uattr, sizeof(attr), size);
+	err = bpf_check_uarg_tail_zero(uattr, sizeof(attr), size);
 	if (err)
 		return err;
 	size = min_t(u32, size, sizeof(attr));
@@ -2188,6 +2392,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
 	case BPF_BTF_GET_FD_BY_ID:
 		err = bpf_btf_get_fd_by_id(&attr);
 		break;
+	case BPF_TASK_FD_QUERY:
+		err = bpf_task_fd_query(&attr, uattr);
+		break;
 	default:
 		err = -EINVAL;
 		break;

--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1262,6 +1262,7 @@ static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
 	switch (env->prog->type) {
 	case BPF_PROG_TYPE_LWT_IN:
 	case BPF_PROG_TYPE_LWT_OUT:
+	case BPF_PROG_TYPE_LWT_SEG6LOCAL:
 		/* dst_input() and dst_output() can't write for now */
 		if (t == BPF_WRITE)
 			return false;
@@ -5383,11 +5384,24 @@ static int jit_subprogs(struct bpf_verifier_env *env)
 			    insn->src_reg != BPF_PSEUDO_CALL)
 				continue;
 			subprog = insn->off;
-			insn->off = 0;
 			insn->imm = (u64 (*)(u64, u64, u64, u64, u64))
 				func[subprog]->bpf_func -
 				__bpf_call_base;
 		}
+		/* we use the aux data to keep a list of the start addresses
+		 * of the JITed images for each function in the program
+		 *
+		 * for some architectures, such as powerpc64, the imm field
+		 * might not be large enough to hold the offset of the start
+		 * address of the callee's JITed image from __bpf_call_base
+		 *
+		 * in such cases, we can lookup the start address of a callee
+		 * by using its subprog id, available from the off field of
+		 * the call instruction, as an index for this list
+		 */
+		func[i]->aux->func = func;
+		func[i]->aux->func_cnt = env->subprog_cnt;
 	}
 	for (i = 0; i < env->subprog_cnt; i++) {
 		old_bpf_func = func[i]->bpf_func;
@@ -5413,17 +5427,12 @@ static int jit_subprogs(struct bpf_verifier_env *env)
 	 * later look the same as if they were interpreted only.
 	 */
 	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
-		unsigned long addr;
 		if (insn->code != (BPF_JMP | BPF_CALL) ||
 		    insn->src_reg != BPF_PSEUDO_CALL)
 			continue;
 		insn->off = env->insn_aux_data[i].call_imm;
 		subprog = find_subprog(env, i + insn->off + 1);
-		addr  = (unsigned long)func[subprog]->bpf_func;
+		insn->imm = subprog;
-		addr &= PAGE_MASK;
-		insn->imm = (u64 (*)(u64, u64, u64, u64, u64))
-			    addr - __bpf_call_base;
 	}
 	prog->jited = 1;

--- a/kernel/bpf/xskmap.c
+++ b/kernel/bpf/xskmap.c
 // SPDX-License-Identifier: GPL-2.0
 /* XSKMAP used for AF_XDP sockets
 * Copyright(c) 2018 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
 */
 #include <linux/bpf.h>

--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -11212,6 +11212,14 @@ struct file *perf_event_get(unsigned int fd)
 	return file;
 }
+const struct perf_event *perf_get_event(struct file *file)
+{
+	if (file->f_op != &perf_fops)
+		return ERR_PTR(-EINVAL);
+	return file->private_data;
+}
 const struct perf_event_attr *perf_event_attrs(struct perf_event *event)
 {
 	if (!event)

--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -14,6 +14,7 @@
 #include <linux/uaccess.h>
 #include <linux/ctype.h>
 #include <linux/kprobes.h>
+#include <linux/syscalls.h>
 #include <linux/error-injection.h>
 #include "trace_probe.h"
@@ -1163,3 +1164,50 @@ int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog)
 	mutex_unlock(&bpf_event_mutex);
 	return err;
 }
+int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
+			    u32 *fd_type, const char **buf,
+			    u64 *probe_offset, u64 *probe_addr)
+{
+	bool is_tracepoint, is_syscall_tp;
+	struct bpf_prog *prog;
+	int flags, err = 0;
+	prog = event->prog;
+	if (!prog)
+		return -ENOENT;
+	/* not supporting BPF_PROG_TYPE_PERF_EVENT yet */
+	if (prog->type == BPF_PROG_TYPE_PERF_EVENT)
+		return -EOPNOTSUPP;
+	*prog_id = prog->aux->id;
+	flags = event->tp_event->flags;
+	is_tracepoint = flags & TRACE_EVENT_FL_TRACEPOINT;
+	is_syscall_tp = is_syscall_trace_event(event->tp_event);
+	if (is_tracepoint || is_syscall_tp) {
+		*buf = is_tracepoint ? event->tp_event->tp->name
+				     : event->tp_event->name;
+		*fd_type = BPF_FD_TYPE_TRACEPOINT;
+		*probe_offset = 0x0;
+		*probe_addr = 0x0;
+	} else {
+		/* kprobe/uprobe */
+		err = -EOPNOTSUPP;
+#ifdef CONFIG_KPROBE_EVENTS
+		if (flags & TRACE_EVENT_FL_KPROBE)
+			err = bpf_get_kprobe_info(event, fd_type, buf,
+						  probe_offset, probe_addr,
+						  event->attr.type == PERF_TYPE_TRACEPOINT);
+#endif
+#ifdef CONFIG_UPROBE_EVENTS
+		if (flags & TRACE_EVENT_FL_UPROBE)
+			err = bpf_get_uprobe_info(event, fd_type, buf,
+						  probe_offset,
+						  event->attr.type == PERF_TYPE_TRACEPOINT);
+#endif
+	}
+	return err;
+}
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1287,6 +1287,35 @@ kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
 			      head, NULL);
 }
 NOKPROBE_SYMBOL(kretprobe_perf_func);
+int bpf_get_kprobe_info(const struct perf_event *event, u32 *fd_type,
+			const char **symbol, u64 *probe_offset,
+			u64 *probe_addr, bool perf_type_tracepoint)
+{
+	const char *pevent = trace_event_name(event->tp_event);
+	const char *group = event->tp_event->class->system;
+	struct trace_kprobe *tk;
+	if (perf_type_tracepoint)
+		tk = find_trace_kprobe(pevent, group);
+	else
+		tk = event->tp_event->data;
+	if (!tk)
+		return -EINVAL;
+	*fd_type = trace_kprobe_is_return(tk) ? BPF_FD_TYPE_KRETPROBE
+					      : BPF_FD_TYPE_KPROBE;
+	if (tk->symbol) {
+		*symbol = tk->symbol;
+		*probe_offset = tk->rp.kp.offset;
+		*probe_addr = 0;
+	} else {
+		*symbol = NULL;
+		*probe_offset = 0;
+		*probe_addr = (unsigned long)tk->rp.kp.addr;
+	}
+	return 0;
+}
 #endif	/* CONFIG_PERF_EVENTS */
 /*

--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -1161,6 +1161,28 @@ static void uretprobe_perf_func(struct trace_uprobe *tu, unsigned long func,
 {
 	__uprobe_perf_func(tu, func, regs, ucb, dsize);
 }
+int bpf_get_uprobe_info(const struct perf_event *event, u32 *fd_type,
+			const char **filename, u64 *probe_offset,
+			bool perf_type_tracepoint)
+{
+	const char *pevent = trace_event_name(event->tp_event);
+	const char *group = event->tp_event->class->system;
+	struct trace_uprobe *tu;
+	if (perf_type_tracepoint)
+		tu = find_probe_event(pevent, group);
+	else
+		tu = event->tp_event->data;
+	if (!tu)
+		return -EINVAL;
+	*fd_type = is_ret_probe(tu) ? BPF_FD_TYPE_URETPROBE
+				    : BPF_FD_TYPE_UPROBE;
+	*filename = tu->filename;
+	*probe_offset = tu->offset;
+	return 0;
+}
 #endif	/* CONFIG_PERF_EVENTS */
 static int

--- a/net/core/filter.c
+++ b/net/core/filter.c
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -308,7 +308,13 @@ int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
 }
 EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model);
-static void xdp_return(void *data, struct xdp_mem_info *mem)
+/* XDP RX runs under NAPI protection, and in different delivery error
+ * scenarios (e.g. queue full), it is possible to return the xdp_frame
+ * while still leveraging this protection.  The @napi_direct boolian
+ * is used for those calls sites.  Thus, allowing for faster recycling
+ * of xdp_frames/pages in those cases.
+ */
+static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct)
 {
 	struct xdp_mem_allocator *xa;
 	struct page *page;
@@ -320,7 +326,7 @@ static void xdp_return(void *data, struct xdp_mem_info *mem)
 		xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params);
 		page = virt_to_head_page(data);
 		if (xa)
-			page_pool_put_page(xa->page_pool, page);
+			page_pool_put_page(xa->page_pool, page, napi_direct);
 		else
 			put_page(page);
 		rcu_read_unlock();
@@ -340,12 +346,18 @@ static void xdp_return(void *data, struct xdp_mem_info *mem)
 void xdp_return_frame(struct xdp_frame *xdpf)
 {
-	xdp_return(xdpf->data, &xdpf->mem);
+	__xdp_return(xdpf->data, &xdpf->mem, false);
 }
 EXPORT_SYMBOL_GPL(xdp_return_frame);
+void xdp_return_frame_rx_napi(struct xdp_frame *xdpf)
+{
+	__xdp_return(xdpf->data, &xdpf->mem, true);
+}
+EXPORT_SYMBOL_GPL(xdp_return_frame_rx_napi);
 void xdp_return_buff(struct xdp_buff *xdp)
 {
-	xdp_return(xdp->data, &xdp->rxq->mem);
+	__xdp_return(xdp->data, &xdp->rxq->mem, true);
 }
 EXPORT_SYMBOL_GPL(xdp_return_buff);
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1352,6 +1352,37 @@ static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
 	return NULL;
 }
+/* MTU selection:
+ * 1. mtu on route is locked - use it
+ * 2. mtu from nexthop exception
+ * 3. mtu from egress device
+ */
+u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr)
+{
+	struct fib_info *fi = res->fi;
+	struct fib_nh *nh = &fi->fib_nh[res->nh_sel];
+	struct net_device *dev = nh->nh_dev;
+	u32 mtu = 0;
+	if (dev_net(dev)->ipv4.sysctl_ip_fwd_use_pmtu ||
+	    fi->fib_metrics->metrics[RTAX_LOCK - 1] & (1 << RTAX_MTU))
+		mtu = fi->fib_mtu;
+	if (likely(!mtu)) {
+		struct fib_nh_exception *fnhe;
+		fnhe = find_exception(nh, daddr);
+		if (fnhe && !time_after_eq(jiffies, fnhe->fnhe_expires))
+			mtu = fnhe->fnhe_pmtu;
+	}
+	if (likely(!mtu))
+		mtu = min(READ_ONCE(dev->mtu), IP_MAX_MTU);
+	return mtu - lwtunnel_headroom(nh->nh_lwtstate, mtu);
+}
 static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
 			      __be32 daddr, const bool do_cache)
 {

--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -329,4 +329,9 @@ config IPV6_SEG6_HMAC
 	  If unsure, say N.
+config IPV6_SEG6_BPF
+	def_bool y
+	depends on IPV6_SEG6_LWTUNNEL
+	depends on IPV6 = y
 endif # IPV6
--- a/net/ipv6/addrconf_core.c
+++ b/net/ipv6/addrconf_core.c
@@ -161,12 +161,20 @@ eafnosupport_fib6_multipath_select(const struct net *net, struct fib6_info *f6i,
 	return f6i;
 }
+static u32
+eafnosupport_ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
+			       struct in6_addr *saddr)
+{
+	return 0;
+}
 const struct ipv6_stub *ipv6_stub __read_mostly = &(struct ipv6_stub) {
 	.ipv6_dst_lookup   = eafnosupport_ipv6_dst_lookup,
 	.fib6_get_table    = eafnosupport_fib6_get_table,
 	.fib6_table_lookup = eafnosupport_fib6_table_lookup,
 	.fib6_lookup       = eafnosupport_fib6_lookup,
 	.fib6_multipath_select = eafnosupport_fib6_multipath_select,
+	.ip6_mtu_from_fib6 = eafnosupport_ip6_mtu_from_fib6,
 };
 EXPORT_SYMBOL_GPL(ipv6_stub);

--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -894,6 +894,7 @@ static const struct ipv6_stub ipv6_stub_impl = {
 	.fib6_table_lookup = fib6_table_lookup,
 	.fib6_lookup       = fib6_lookup,
 	.fib6_multipath_select = fib6_multipath_select,
+	.ip6_mtu_from_fib6 = ip6_mtu_from_fib6,
 	.udpv6_encap_enable = udpv6_encap_enable,
 	.ndisc_send_na = ndisc_send_na,
 	.nd_tbl	= &nd_tbl,

--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2604,6 +2604,54 @@ static unsigned int ip6_mtu(const struct dst_entry *dst)
 	return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
 }
+/* MTU selection:
+ * 1. mtu on route is locked - use it
+ * 2. mtu from nexthop exception
+ * 3. mtu from egress device
+ *
+ * based on ip6_dst_mtu_forward and exception logic of
+ * rt6_find_cached_rt; called with rcu_read_lock
+ */
+u32 ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
+		      struct in6_addr *saddr)
+{
+	struct rt6_exception_bucket *bucket;
+	struct rt6_exception *rt6_ex;
+	struct in6_addr *src_key;
+	struct inet6_dev *idev;
+	u32 mtu = 0;
+	if (unlikely(fib6_metric_locked(f6i, RTAX_MTU))) {
+		mtu = f6i->fib6_pmtu;
+		if (mtu)
+			goto out;
+	}
+	src_key = NULL;
+#ifdef CONFIG_IPV6_SUBTREES
+	if (f6i->fib6_src.plen)
+		src_key = saddr;
+#endif
+	bucket = rcu_dereference(f6i->rt6i_exception_bucket);
+	rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
+	if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
+		mtu = dst_metric_raw(&rt6_ex->rt6i->dst, RTAX_MTU);
+	if (likely(!mtu)) {
+		struct net_device *dev = fib6_info_nh_dev(f6i);
+		mtu = IPV6_MIN_MTU;
+		idev = __in6_dev_get(dev);
+		if (idev && idev->cnf.mtu6 > mtu)
+			mtu = idev->cnf.mtu6;
+	}
+	mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
+out:
+	return mtu - lwtunnel_headroom(fib6_info_nh_lwt(f6i), mtu);
+}
 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
 				  struct flowi6 *fl6)
 {

--- a/net/ipv6/seg6_local.c
+++ b/net/ipv6/seg6_local.c
 /*
 *  SR-IPv6 implementation
 *
- *  Author:
+ *  Authors:
 *  David Lebrun <david.lebrun@uclouvain.be>
+ *  eBPF support: Mathieu Xhonneux <m.xhonneux@gmail.com>
 *
 *
 *  This program is free software; you can redistribute it and/or
@@ -30,7 +31,9 @@
 #ifdef CONFIG_IPV6_SEG6_HMAC
 #include <net/seg6_hmac.h>
 #endif
+#include <net/seg6_local.h>
 #include <linux/etherdevice.h>
+#include <linux/bpf.h>
 struct seg6_local_lwt;
@@ -41,6 +44,11 @@ struct seg6_action_desc {
 	int static_headroom;
 };
+struct bpf_lwt_prog {
+	struct bpf_prog *prog;
+	char *name;
+};
 struct seg6_local_lwt {
 	int action;
 	struct ipv6_sr_hdr *srh;
@@ -49,6 +57,7 @@ struct seg6_local_lwt {
 	struct in6_addr nh6;
 	int iif;
 	int oif;
+	struct bpf_lwt_prog bpf;
 	int headroom;
 	struct seg6_action_desc *desc;
@@ -140,8 +149,8 @@ static void advance_nextseg(struct ipv6_sr_hdr *srh, struct in6_addr *daddr)
 	*daddr = *addr;
 }
-static void lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
+int seg6_lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
-			   u32 tbl_id)
+			u32 tbl_id)
 {
 	struct net *net = dev_net(skb->dev);
 	struct ipv6hdr *hdr = ipv6_hdr(skb);
@@ -187,6 +196,7 @@ static void lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
 	skb_dst_drop(skb);
 	skb_dst_set(skb, dst);
+	return dst->error;
 }
 /* regular endpoint function */
@@ -200,7 +210,7 @@ static int input_action_end(struct sk_buff *skb, struct seg6_local_lwt *slwt)
 	advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
-	lookup_nexthop(skb, NULL, 0);
+	seg6_lookup_nexthop(skb, NULL, 0);
 	return dst_input(skb);
@@ -220,7 +230,7 @@ static int input_action_end_x(struct sk_buff *skb, struct seg6_local_lwt *slwt)
 	advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
-	lookup_nexthop(skb, &slwt->nh6, 0);
+	seg6_lookup_nexthop(skb, &slwt->nh6, 0);
 	return dst_input(skb);
@@ -239,7 +249,7 @@ static int input_action_end_t(struct sk_buff *skb, struct seg6_local_lwt *slwt)
 	advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
-	lookup_nexthop(skb, NULL, slwt->table);
+	seg6_lookup_nexthop(skb, NULL, slwt->table);
 	return dst_input(skb);
@@ -331,7 +341,7 @@ static int input_action_end_dx6(struct sk_buff *skb,
 	if (!ipv6_addr_any(&slwt->nh6))
 		nhaddr = &slwt->nh6;
-	lookup_nexthop(skb, nhaddr, 0);
+	seg6_lookup_nexthop(skb, nhaddr, 0);
 	return dst_input(skb);
 drop:
@@ -380,7 +390,7 @@ static int input_action_end_dt6(struct sk_buff *skb,
 	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
 		goto drop;
-	lookup_nexthop(skb, NULL, slwt->table);
+	seg6_lookup_nexthop(skb, NULL, slwt->table);
 	return dst_input(skb);
@@ -406,7 +416,7 @@ static int input_action_end_b6(struct sk_buff *skb, struct seg6_local_lwt *slwt)
 	ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
 	skb_set_transport_header(skb, sizeof(struct ipv6hdr));
-	lookup_nexthop(skb, NULL, 0);
+	seg6_lookup_nexthop(skb, NULL, 0);
 	return dst_input(skb);
@@ -438,7 +448,7 @@ static int input_action_end_b6_encap(struct sk_buff *skb,
 	ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
 	skb_set_transport_header(skb, sizeof(struct ipv6hdr));
-	lookup_nexthop(skb, NULL, 0);
+	seg6_lookup_nexthop(skb, NULL, 0);
 	return dst_input(skb);
@@ -447,6 +457,71 @@ static int input_action_end_b6_encap(struct sk_buff *skb,
 	return err;
 }
+DEFINE_PER_CPU(struct seg6_bpf_srh_state, seg6_bpf_srh_states);
+static int input_action_end_bpf(struct sk_buff *skb,
+				struct seg6_local_lwt *slwt)
+{
+	struct seg6_bpf_srh_state *srh_state =
+		this_cpu_ptr(&seg6_bpf_srh_states);
+	struct seg6_bpf_srh_state local_srh_state;
+	struct ipv6_sr_hdr *srh;
+	int srhoff = 0;
+	int ret;
+	srh = get_and_validate_srh(skb);
+	if (!srh)
+		goto drop;
+	advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
+	/* preempt_disable is needed to protect the per-CPU buffer srh_state,
+	 * which is also accessed by the bpf_lwt_seg6_* helpers
+	 */
+	preempt_disable();
+	srh_state->hdrlen = srh->hdrlen << 3;
+	srh_state->valid = 1;
+	rcu_read_lock();
+	bpf_compute_data_pointers(skb);
+	ret = bpf_prog_run_save_cb(slwt->bpf.prog, skb);
+	rcu_read_unlock();
+	local_srh_state = *srh_state;
+	preempt_enable();
+	switch (ret) {
+	case BPF_OK:
+	case BPF_REDIRECT:
+		break;
+	case BPF_DROP:
+		goto drop;
+	default:
+		pr_warn_once("bpf-seg6local: Illegal return value %u\n", ret);
+		goto drop;
+	}
+	if (unlikely((local_srh_state.hdrlen & 7) != 0))
+		goto drop;
+	if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
+		goto drop;
+	srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
+	srh->hdrlen = (u8)(local_srh_state.hdrlen >> 3);
+	if (!local_srh_state.valid &&
+	    unlikely(!seg6_validate_srh(srh, (srh->hdrlen + 1) << 3)))
+		goto drop;
+	if (ret != BPF_REDIRECT)
+		seg6_lookup_nexthop(skb, NULL, 0);
+	return dst_input(skb);
+drop:
+	kfree_skb(skb);
+	return -EINVAL;
+}
 static struct seg6_action_desc seg6_action_table[] = {
 	{
 		.action		= SEG6_LOCAL_ACTION_END,
@@ -493,7 +568,13 @@ static struct seg6_action_desc seg6_action_table[] = {
 		.attrs		= (1 << SEG6_LOCAL_SRH),
 		.input		= input_action_end_b6_encap,
 		.static_headroom	= sizeof(struct ipv6hdr),
-	}
+	},
+	{
+		.action		= SEG6_LOCAL_ACTION_END_BPF,
+		.attrs		= (1 << SEG6_LOCAL_BPF),
+		.input		= input_action_end_bpf,
+	},
 };
 static struct seg6_action_desc *__get_action_desc(int action)
@@ -538,6 +619,7 @@ static const struct nla_policy seg6_local_policy[SEG6_LOCAL_MAX + 1] = {
 				    .len = sizeof(struct in6_addr) },
 	[SEG6_LOCAL_IIF]	= { .type = NLA_U32 },
 	[SEG6_LOCAL_OIF]	= { .type = NLA_U32 },
+	[SEG6_LOCAL_BPF]	= { .type = NLA_NESTED },
 };
 static int parse_nla_srh(struct nlattr **attrs, struct seg6_local_lwt *slwt)
@@ -715,6 +797,75 @@ static int cmp_nla_oif(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
 	return 0;
 }
+#define MAX_PROG_NAME 256
+static const struct nla_policy bpf_prog_policy[SEG6_LOCAL_BPF_PROG_MAX + 1] = {
+	[SEG6_LOCAL_BPF_PROG]	   = { .type = NLA_U32, },
+	[SEG6_LOCAL_BPF_PROG_NAME] = { .type = NLA_NUL_STRING,
+				       .len = MAX_PROG_NAME },
+};
+static int parse_nla_bpf(struct nlattr **attrs, struct seg6_local_lwt *slwt)
+{
+	struct nlattr *tb[SEG6_LOCAL_BPF_PROG_MAX + 1];
+	struct bpf_prog *p;
+	int ret;
+	u32 fd;
+	ret = nla_parse_nested(tb, SEG6_LOCAL_BPF_PROG_MAX,
+			       attrs[SEG6_LOCAL_BPF], bpf_prog_policy, NULL);
+	if (ret < 0)
+		return ret;
+	if (!tb[SEG6_LOCAL_BPF_PROG] || !tb[SEG6_LOCAL_BPF_PROG_NAME])
+		return -EINVAL;
+	slwt->bpf.name = nla_memdup(tb[SEG6_LOCAL_BPF_PROG_NAME], GFP_KERNEL);
+	if (!slwt->bpf.name)
+		return -ENOMEM;
+	fd = nla_get_u32(tb[SEG6_LOCAL_BPF_PROG]);
+	p = bpf_prog_get_type(fd, BPF_PROG_TYPE_LWT_SEG6LOCAL);
+	if (IS_ERR(p)) {
+		kfree(slwt->bpf.name);
+		return PTR_ERR(p);
+	}
+	slwt->bpf.prog = p;
+	return 0;
+}
+static int put_nla_bpf(struct sk_buff *skb, struct seg6_local_lwt *slwt)
+{
+	struct nlattr *nest;
+	if (!slwt->bpf.prog)
+		return 0;
+	nest = nla_nest_start(skb, SEG6_LOCAL_BPF);
+	if (!nest)
+		return -EMSGSIZE;
+	if (nla_put_u32(skb, SEG6_LOCAL_BPF_PROG, slwt->bpf.prog->aux->id))
+		return -EMSGSIZE;
+	if (slwt->bpf.name &&
+	    nla_put_string(skb, SEG6_LOCAL_BPF_PROG_NAME, slwt->bpf.name))
+		return -EMSGSIZE;
+	return nla_nest_end(skb, nest);
+}
+static int cmp_nla_bpf(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
+{
+	if (!a->bpf.name && !b->bpf.name)
+		return 0;
+	if (!a->bpf.name || !b->bpf.name)
+		return 1;
+	return strcmp(a->bpf.name, b->bpf.name);
+}
 struct seg6_action_param {
 	int (*parse)(struct nlattr **attrs, struct seg6_local_lwt *slwt);
 	int (*put)(struct sk_buff *skb, struct seg6_local_lwt *slwt);
@@ -745,6 +896,11 @@ static struct seg6_action_param seg6_action_params[SEG6_LOCAL_MAX + 1] = {
 	[SEG6_LOCAL_OIF]	= { .parse = parse_nla_oif,
 				    .put = put_nla_oif,
 				    .cmp = cmp_nla_oif },
+	[SEG6_LOCAL_BPF]	= { .parse = parse_nla_bpf,
+				    .put = put_nla_bpf,
+				    .cmp = cmp_nla_bpf },
 };
 static int parse_nla_action(struct nlattr **attrs, struct seg6_local_lwt *slwt)
@@ -830,6 +986,13 @@ static void seg6_local_destroy_state(struct lwtunnel_state *lwt)
 	struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt);
 	kfree(slwt->srh);
+	if (slwt->desc->attrs & (1 << SEG6_LOCAL_BPF)) {
+		kfree(slwt->bpf.name);
+		bpf_prog_put(slwt->bpf.prog);
+	}
+	return;
 }
 static int seg6_local_fill_encap(struct sk_buff *skb,
@@ -882,6 +1045,11 @@ static int seg6_local_get_encap_size(struct lwtunnel_state *lwt)
 	if (attrs & (1 << SEG6_LOCAL_OIF))
 		nlsize += nla_total_size(4);
+	if (attrs & (1 << SEG6_LOCAL_BPF))
+		nlsize += nla_total_size(sizeof(struct nlattr)) +
+		       nla_total_size(MAX_PROG_NAME) +
+		       nla_total_size(4);
 	return nlsize;
 }

--- a/net/xdp/Makefile
+++ b/net/xdp/Makefile
 obj-$(CONFIG_XDP_SOCKETS) += xsk.o xdp_umem.o xsk_queue.o
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
 // SPDX-License-Identifier: GPL-2.0
 /* XDP user-space packet buffer
 * Copyright(c) 2018 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
 */
 #include <linux/init.h>
@@ -25,39 +16,25 @@
 #define XDP_UMEM_MIN_FRAME_SIZE 2048
-int xdp_umem_create(struct xdp_umem **umem)
-{
-	*umem = kzalloc(sizeof(**umem), GFP_KERNEL);
-	if (!(*umem))
-		return -ENOMEM;
-	return 0;
-}
 static void xdp_umem_unpin_pages(struct xdp_umem *umem)
 {
 	unsigned int i;
-	if (umem->pgs) {
+	for (i = 0; i < umem->npgs; i++) {
-		for (i = 0; i < umem->npgs; i++) {
+		struct page *page = umem->pgs[i];
-			struct page *page = umem->pgs[i];
-			set_page_dirty_lock(page);
-			put_page(page);
-		}
-		kfree(umem->pgs);
+		set_page_dirty_lock(page);
-		umem->pgs = NULL;
+		put_page(page);
 	}
+	kfree(umem->pgs);
+	umem->pgs = NULL;
 }
 static void xdp_umem_unaccount_pages(struct xdp_umem *umem)
 {
-	if (umem->user) {
+	atomic_long_sub(umem->npgs, &umem->user->locked_vm);
-		atomic_long_sub(umem->npgs, &umem->user->locked_vm);
+	free_uid(umem->user);
-		free_uid(umem->user);
-	}
 }
 static void xdp_umem_release(struct xdp_umem *umem)
@@ -75,22 +52,18 @@ static void xdp_umem_release(struct xdp_umem *umem)
 		umem->cq = NULL;
 	}
-	if (umem->pgs) {
+	xdp_umem_unpin_pages(umem);
-		xdp_umem_unpin_pages(umem);
-		task = get_pid_task(umem->pid, PIDTYPE_PID);
-		put_pid(umem->pid);
-		if (!task)
-			goto out;
-		mm = get_task_mm(task);
-		put_task_struct(task);
-		if (!mm)
-			goto out;
-		mmput(mm);
+	task = get_pid_task(umem->pid, PIDTYPE_PID);
-		umem->pgs = NULL;
+	put_pid(umem->pid);
-	}
+	if (!task)
+		goto out;
+	mm = get_task_mm(task);
+	put_task_struct(task);
+	if (!mm)
+		goto out;
+	mmput(mm);
 	xdp_umem_unaccount_pages(umem);
 out:
 	kfree(umem);
@@ -105,7 +78,7 @@ static void xdp_umem_release_deferred(struct work_struct *work)
 void xdp_get_umem(struct xdp_umem *umem)
 {
-	atomic_inc(&umem->users);
+	refcount_inc(&umem->users);
 }
 void xdp_put_umem(struct xdp_umem *umem)
@@ -113,7 +86,7 @@ void xdp_put_umem(struct xdp_umem *umem)
 	if (!umem)
 		return;
-	if (atomic_dec_and_test(&umem->users)) {
+	if (refcount_dec_and_test(&umem->users)) {
 		INIT_WORK(&umem->work, xdp_umem_release_deferred);
 		schedule_work(&umem->work);
 	}
@@ -176,16 +149,13 @@ static int xdp_umem_account_pages(struct xdp_umem *umem)
 	return 0;
 }
-int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
+static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
 {
 	u32 frame_size = mr->frame_size, frame_headroom = mr->frame_headroom;
 	u64 addr = mr->addr, size = mr->len;
 	unsigned int nframes, nfpp;
 	int size_chk, err;
-	if (!umem)
-		return -EINVAL;
 	if (frame_size < XDP_UMEM_MIN_FRAME_SIZE || frame_size > PAGE_SIZE) {
 		/* Strictly speaking we could support this, if:
 		 * - huge pages, or*
@@ -236,7 +206,7 @@ int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
 	umem->frame_size_log2 = ilog2(frame_size);
 	umem->nfpp_mask = nfpp - 1;
 	umem->nfpplog2 = ilog2(nfpp);
-	atomic_set(&umem->users, 1);
+	refcount_set(&umem->users, 1);
 	err = xdp_umem_account_pages(umem);
 	if (err)
@@ -254,7 +224,25 @@ int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
 	return err;
 }
+struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr)
+{
+	struct xdp_umem *umem;
+	int err;
+	umem = kzalloc(sizeof(*umem), GFP_KERNEL);
+	if (!umem)
+		return ERR_PTR(-ENOMEM);
+	err = xdp_umem_reg(umem, mr);
+	if (err) {
+		kfree(umem);
+		return ERR_PTR(err);
+	}
+	return umem;
+}
 bool xdp_umem_validate_queues(struct xdp_umem *umem)
 {
-	return (umem->fq && umem->cq);
+	return umem->fq && umem->cq;
 }
--- a/net/xdp/xdp_umem.h
+++ b/net/xdp/xdp_umem.h
-/* SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
- * XDP user-space packet buffer
+/* XDP user-space packet buffer
 * Copyright(c) 2018 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
 */
 #ifndef XDP_UMEM_H_
@@ -36,7 +27,7 @@ struct xdp_umem {
 	struct pid *pid;
 	unsigned long address;
 	size_t size;
-	atomic_t users;
+	refcount_t users;
 	struct work_struct work;
 };
@@ -59,9 +50,8 @@ static inline char *xdp_umem_get_data_with_headroom(struct xdp_umem *umem,
 }
 bool xdp_umem_validate_queues(struct xdp_umem *umem);
-int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr);
 void xdp_get_umem(struct xdp_umem *umem);
 void xdp_put_umem(struct xdp_umem *umem);
-int xdp_umem_create(struct xdp_umem **umem);
+struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr);
 #endif /* XDP_UMEM_H_ */
--- a/net/xdp/xdp_umem_props.h
+++ b/net/xdp/xdp_umem_props.h
-/* SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
- * XDP user-space packet buffer
+/* XDP user-space packet buffer
 * Copyright(c) 2018 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
 */
 #ifndef XDP_UMEM_PROPS_H_

--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -5,15 +5,6 @@
 * applications.
 * Copyright(c) 2018 Intel Corporation.
 *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
 * Author(s): Björn Töpel <bjorn.topel@intel.com>
 *	      Magnus Karlsson <magnus.karlsson@intel.com>
 */
@@ -151,6 +142,11 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
 			goto out;
 		}
+		if (xs->queue_id >= xs->dev->real_num_tx_queues) {
+			err = -ENXIO;
+			goto out;
+		}
 		skb = sock_alloc_send_skb(sk, len, !need_wait, &err);
 		if (unlikely(!skb)) {
 			err = -EAGAIN;
@@ -232,18 +228,12 @@ static int xsk_init_queue(u32 entries, struct xsk_queue **queue,
 	if (!q)
 		return -ENOMEM;
+	/* Make sure queue is ready before it can be seen by others */
+	smp_wmb();
 	*queue = q;
 	return 0;
 }
-static void __xsk_release(struct xdp_sock *xs)
-{
-	/* Wait for driver to stop using the xdp socket. */
-	synchronize_net();
-	dev_put(xs->dev);
-}
 static int xsk_release(struct socket *sock)
 {
 	struct sock *sk = sock->sk;
@@ -260,7 +250,9 @@ static int xsk_release(struct socket *sock)
 	local_bh_enable();
 	if (xs->dev) {
-		__xsk_release(xs);
+		/* Wait for driver to stop using the xdp socket. */
+		synchronize_net();
+		dev_put(xs->dev);
 		xs->dev = NULL;
 	}
@@ -294,9 +286,8 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
 {
 	struct sockaddr_xdp *sxdp = (struct sockaddr_xdp *)addr;
 	struct sock *sk = sock->sk;
-	struct net_device *dev, *dev_curr;
 	struct xdp_sock *xs = xdp_sk(sk);
-	struct xdp_umem *old_umem = NULL;
+	struct net_device *dev;
 	int err = 0;
 	if (addr_len < sizeof(struct sockaddr_xdp))
@@ -305,7 +296,11 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
 		return -EINVAL;
 	mutex_lock(&xs->mutex);
-	dev_curr = xs->dev;
+	if (xs->dev) {
+		err = -EBUSY;
+		goto out_release;
+	}
 	dev = dev_get_by_index(sock_net(sk), sxdp->sxdp_ifindex);
 	if (!dev) {
 		err = -ENODEV;
@@ -317,7 +312,8 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
 		goto out_unlock;
 	}
-	if (sxdp->sxdp_queue_id >= dev->num_rx_queues) {
+	if ((xs->rx && sxdp->sxdp_queue_id >= dev->real_num_rx_queues) ||
+	    (xs->tx && sxdp->sxdp_queue_id >= dev->real_num_tx_queues)) {
 		err = -EINVAL;
 		goto out_unlock;
 	}
@@ -352,7 +348,6 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
 		}
 		xdp_get_umem(umem_xs->umem);
-		old_umem = xs->umem;
 		xs->umem = umem_xs->umem;
 		sockfd_put(sock);
 	} else if (!xs->umem || !xdp_umem_validate_queues(xs->umem)) {
@@ -364,14 +359,6 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
 		xskq_set_umem(xs->umem->cq, &xs->umem->props);
 	}
-	/* Rebind? */
-	if (dev_curr && (dev_curr != dev ||
-			 xs->queue_id != sxdp->sxdp_queue_id)) {
-		__xsk_release(xs);
-		if (old_umem)
-			xdp_put_umem(old_umem);
-	}
 	xs->dev = dev;
 	xs->queue_id = sxdp->sxdp_queue_id;
@@ -419,25 +406,23 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
 		struct xdp_umem_reg mr;
 		struct xdp_umem *umem;
-		if (xs->umem)
-			return -EBUSY;
 		if (copy_from_user(&mr, optval, sizeof(mr)))
 			return -EFAULT;
 		mutex_lock(&xs->mutex);
-		err = xdp_umem_create(&umem);
+		if (xs->umem) {
+			mutex_unlock(&xs->mutex);
+			return -EBUSY;
+		}
-		err = xdp_umem_reg(umem, &mr);
+		umem = xdp_umem_create(&mr);
-		if (err) {
+		if (IS_ERR(umem)) {
-			kfree(umem);
 			mutex_unlock(&xs->mutex);
-			return err;
+			return PTR_ERR(umem);
 		}
 		/* Make sure umem is ready before it can be seen by others */
 		smp_wmb();
 		xs->umem = umem;
 		mutex_unlock(&xs->mutex);
 		return 0;
@@ -448,13 +433,15 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
 		struct xsk_queue **q;
 		int entries;
-		if (!xs->umem)
-			return -EINVAL;
 		if (copy_from_user(&entries, optval, sizeof(entries)))
 			return -EFAULT;
 		mutex_lock(&xs->mutex);
+		if (!xs->umem) {
+			mutex_unlock(&xs->mutex);
+			return -EINVAL;
+		}
 		q = (optname == XDP_UMEM_FILL_RING) ? &xs->umem->fq :
 			&xs->umem->cq;
 		err = xsk_init_queue(entries, q, true);
@@ -504,6 +491,35 @@ static int xsk_getsockopt(struct socket *sock, int level, int optname,
 		return 0;
 	}
+	case XDP_MMAP_OFFSETS:
+	{
+		struct xdp_mmap_offsets off;
+		if (len < sizeof(off))
+			return -EINVAL;
+		off.rx.producer = offsetof(struct xdp_rxtx_ring, ptrs.producer);
+		off.rx.consumer = offsetof(struct xdp_rxtx_ring, ptrs.consumer);
+		off.rx.desc	= offsetof(struct xdp_rxtx_ring, desc);
+		off.tx.producer = offsetof(struct xdp_rxtx_ring, ptrs.producer);
+		off.tx.consumer = offsetof(struct xdp_rxtx_ring, ptrs.consumer);
+		off.tx.desc	= offsetof(struct xdp_rxtx_ring, desc);
+		off.fr.producer = offsetof(struct xdp_umem_ring, ptrs.producer);
+		off.fr.consumer = offsetof(struct xdp_umem_ring, ptrs.consumer);
+		off.fr.desc	= offsetof(struct xdp_umem_ring, desc);
+		off.cr.producer = offsetof(struct xdp_umem_ring, ptrs.producer);
+		off.cr.consumer = offsetof(struct xdp_umem_ring, ptrs.consumer);
+		off.cr.desc	= offsetof(struct xdp_umem_ring, desc);
+		len = sizeof(off);
+		if (copy_to_user(optval, &off, len))
+			return -EFAULT;
+		if (put_user(len, optlen))
+			return -EFAULT;
+		return 0;
+	}
 	default:
 		break;
 	}
@@ -518,21 +534,23 @@ static int xsk_mmap(struct file *file, struct socket *sock,
 	unsigned long size = vma->vm_end - vma->vm_start;
 	struct xdp_sock *xs = xdp_sk(sock->sk);
 	struct xsk_queue *q = NULL;
+	struct xdp_umem *umem;
 	unsigned long pfn;
 	struct page *qpg;
 	if (offset == XDP_PGOFF_RX_RING) {
-		q = xs->rx;
+		q = READ_ONCE(xs->rx);
 	} else if (offset == XDP_PGOFF_TX_RING) {
-		q = xs->tx;
+		q = READ_ONCE(xs->tx);
 	} else {
-		if (!xs->umem)
+		umem = READ_ONCE(xs->umem);
+		if (!umem)
 			return -EINVAL;
 		if (offset == XDP_UMEM_PGOFF_FILL_RING)
-			q = xs->umem->fq;
+			q = READ_ONCE(umem->fq);
 		else if (offset == XDP_UMEM_PGOFF_COMPLETION_RING)
-			q = xs->umem->cq;
+			q = READ_ONCE(umem->cq);
 	}
 	if (!q)
@@ -554,24 +572,24 @@ static struct proto xsk_proto = {
 };
 static const struct proto_ops xsk_proto_ops = {
-	.family =	PF_XDP,
+	.family		= PF_XDP,
-	.owner =	THIS_MODULE,
+	.owner		= THIS_MODULE,
-	.release =	xsk_release,
+	.release	= xsk_release,
-	.bind =		xsk_bind,
+	.bind		= xsk_bind,
-	.connect =	sock_no_connect,
+	.connect	= sock_no_connect,
-	.socketpair =	sock_no_socketpair,
+	.socketpair	= sock_no_socketpair,
-	.accept =	sock_no_accept,
+	.accept		= sock_no_accept,
-	.getname =	sock_no_getname,
+	.getname	= sock_no_getname,
-	.poll =		xsk_poll,
+	.poll		= xsk_poll,
-	.ioctl =	sock_no_ioctl,
+	.ioctl		= sock_no_ioctl,
-	.listen =	sock_no_listen,
+	.listen		= sock_no_listen,
-	.shutdown =	sock_no_shutdown,
+	.shutdown	= sock_no_shutdown,
-	.setsockopt =	xsk_setsockopt,
+	.setsockopt	= xsk_setsockopt,
-	.getsockopt =	xsk_getsockopt,
+	.getsockopt	= xsk_getsockopt,
-	.sendmsg =	xsk_sendmsg,
+	.sendmsg	= xsk_sendmsg,
-	.recvmsg =	sock_no_recvmsg,
+	.recvmsg	= sock_no_recvmsg,
-	.mmap =		xsk_mmap,
+	.mmap		= xsk_mmap,
-	.sendpage =	sock_no_sendpage,
+	.sendpage	= sock_no_sendpage,
 };
 static void xsk_destruct(struct sock *sk)

--- a/net/xdp/xsk_queue.c
+++ b/net/xdp/xsk_queue.c
 // SPDX-License-Identifier: GPL-2.0
 /* XDP user-space ring structure
 * Copyright(c) 2018 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
 */
 #include <linux/slab.h>
@@ -31,8 +22,7 @@ static u32 xskq_umem_get_ring_size(struct xsk_queue *q)
 static u32 xskq_rxtx_get_ring_size(struct xsk_queue *q)
 {
-	return (sizeof(struct xdp_ring) +
+	return sizeof(struct xdp_ring) + q->nentries * sizeof(struct xdp_desc);
-		q->nentries * sizeof(struct xdp_desc));
 }
 struct xsk_queue *xskq_create(u32 nentries, bool umem_queue)

--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
-/* SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
- * XDP user-space ring structure
+/* XDP user-space ring structure
 * Copyright(c) 2018 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
 */
 #ifndef _LINUX_XSK_QUEUE_H
@@ -22,6 +13,23 @@
 #define RX_BATCH_SIZE 16
+struct xdp_ring {
+	u32 producer ____cacheline_aligned_in_smp;
+	u32 consumer ____cacheline_aligned_in_smp;
+};
+/* Used for the RX and TX queues for packets */
+struct xdp_rxtx_ring {
+	struct xdp_ring ptrs;
+	struct xdp_desc desc[0] ____cacheline_aligned_in_smp;
+};
+/* Used for the fill and completion queues for buffers */
+struct xdp_umem_ring {
+	struct xdp_ring ptrs;
+	u32 desc[0] ____cacheline_aligned_in_smp;
+};
 struct xsk_queue {
 	struct xdp_umem_props umem_props;
 	u32 ring_mask;
@@ -232,12 +240,12 @@ static inline void xskq_produce_flush_desc(struct xsk_queue *q)
 static inline bool xskq_full_desc(struct xsk_queue *q)
 {
-	return (xskq_nb_avail(q, q->nentries) == q->nentries);
+	return xskq_nb_avail(q, q->nentries) == q->nentries;
 }
 static inline bool xskq_empty_desc(struct xsk_queue *q)
 {
-	return (xskq_nb_free(q, q->prod_tail, 1) == q->nentries);
+	return xskq_nb_free(q, q->prod_tail, 1) == q->nentries;
 }
 void xskq_set_umem(struct xsk_queue *q, struct xdp_umem_props *umem_props);

--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -51,6 +51,7 @@ hostprogs-y += cpustat
 hostprogs-y += xdp_adjust_tail
 hostprogs-y += xdpsock
 hostprogs-y += xdp_fwd
+hostprogs-y += task_fd_query
 # Libbpf dependencies
 LIBBPF = $(TOOLS_PATH)/lib/bpf/libbpf.a
@@ -105,6 +106,7 @@ cpustat-objs := bpf_load.o cpustat_user.o
 xdp_adjust_tail-objs := xdp_adjust_tail_user.o
 xdpsock-objs := bpf_load.o xdpsock_user.o
 xdp_fwd-objs := bpf_load.o xdp_fwd_user.o
+task_fd_query-objs := bpf_load.o task_fd_query_user.o $(TRACE_HELPERS)
 # Tell kbuild to always build the programs
 always := $(hostprogs-y)
@@ -160,6 +162,7 @@ always += cpustat_kern.o
 always += xdp_adjust_tail_kern.o
 always += xdpsock_kern.o
 always += xdp_fwd_kern.o
+always += task_fd_query_kern.o
 HOSTCFLAGS += -I$(objtree)/usr/include
 HOSTCFLAGS += -I$(srctree)/tools/lib/
@@ -175,6 +178,7 @@ HOSTCFLAGS_offwaketime_user.o += -I$(srctree)/tools/lib/bpf/
 HOSTCFLAGS_spintest_user.o += -I$(srctree)/tools/lib/bpf/
 HOSTCFLAGS_trace_event_user.o += -I$(srctree)/tools/lib/bpf/
 HOSTCFLAGS_sampleip_user.o += -I$(srctree)/tools/lib/bpf/
+HOSTCFLAGS_task_fd_query_user.o += -I$(srctree)/tools/lib/bpf/
 HOST_LOADLIBES		+= $(LIBBPF) -lelf
 HOSTLOADLIBES_tracex4		+= -lrt

--- a/samples/bpf/task_fd_query_kern.c
+++ b/samples/bpf/task_fd_query_kern.c
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/version.h>
+#include <linux/ptrace.h>
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+SEC("kprobe/blk_start_request")
+int bpf_prog1(struct pt_regs *ctx)
+{
+	return 0;
+}
+SEC("kretprobe/blk_account_io_completion")
+int bpf_prog2(struct pt_regs *ctx)
+{
+	return 0;
+}
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
--- a/samples/bpf/task_fd_query_user.c
+++ b/samples/bpf/task_fd_query_user.c
--- a/samples/bpf/xdp_monitor_kern.c
+++ b/samples/bpf/xdp_monitor_kern.c
--- a/samples/bpf/xdp_monitor_user.c
+++ b/samples/bpf/xdp_monitor_user.c
--- a/samples/bpf/xdpsock_user.c
+++ b/samples/bpf/xdpsock_user.c
--- a/scripts/bpf_helpers_doc.py
+++ b/scripts/bpf_helpers_doc.py
@@ -95,7 +95,7 @@ class HeaderParser(object):
        return capture.group(1)
    def parse_desc(self):
-        p = re.compile(' \* ?(?:\t| {6,8})Description$')
+        p = re.compile(' \* ?(?:\t| {5,8})Description$')
        capture = p.match(self.line)
        if not capture:
            # Helper can have empty description and we might be parsing another
@@ -109,7 +109,7 @@ class HeaderParser(object):
            if self.line == ' *\n':
                desc += '\n'
            else:
-                p = re.compile(' \* ?(?:\t| {6,8})(?:\t| {8})(.*)')
+                p = re.compile(' \* ?(?:\t| {5,8})(?:\t| {8})(.*)')
                capture = p.match(self.line)
                if capture:
                    desc += capture.group(1) + '\n'
@@ -118,7 +118,7 @@ class HeaderParser(object):
        return desc
    def parse_ret(self):
-        p = re.compile(' \* ?(?:\t| {6,8})Return$')
+        p = re.compile(' \* ?(?:\t| {5,8})Return$')
        capture = p.match(self.line)
        if not capture:
            # Helper can have empty retval and we might be parsing another
@@ -132,7 +132,7 @@ class HeaderParser(object):
            if self.line == ' *\n':
                ret += '\n'
            else:
-                p = re.compile(' \* ?(?:\t| {6,8})(?:\t| {8})(.*)')
+                p = re.compile(' \* ?(?:\t| {5,8})(?:\t| {8})(.*)')
                capture = p.match(self.line)
                if capture:
                    ret += capture.group(1) + '\n'

--- a/tools/bpf/bpftool/Documentation/bpftool-perf.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-perf.rst
--- a/tools/bpf/bpftool/Documentation/bpftool.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool.rst
@@ -16,7 +16,7 @@ SYNOPSIS
 	**bpftool** **version**
-	*OBJECT* := { **map** | **program** | **cgroup** }
+	*OBJECT* := { **map** | **program** | **cgroup** | **perf** }
 	*OPTIONS* := { { **-V** | **--version** } | { **-h** | **--help** }
 	| { **-j** | **--json** } [{ **-p** | **--pretty** }] }
@@ -30,6 +30,8 @@ SYNOPSIS
 	*CGROUP-COMMANDS* := { **show** | **list** | **attach** | **detach** | **help** }
+	*PERF-COMMANDS* := { **show** | **list** | **help** }
 DESCRIPTION
 ===========
 	*bpftool* allows for inspection and simple modification of BPF objects
@@ -56,3 +58,4 @@ OPTIONS
 SEE ALSO
 ========
 	**bpftool-map**\ (8), **bpftool-prog**\ (8), **bpftool-cgroup**\ (8)
+        **bpftool-perf**\ (8)
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
--- a/tools/bpf/bpftool/perf.c
+++ b/tools/bpf/bpftool/perf.c
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
--- a/tools/bpf/bpftool/xlated_dumper.c
+++ b/tools/bpf/bpftool/xlated_dumper.c
--- a/tools/bpf/bpftool/xlated_dumper.h
+++ b/tools/bpf/bpftool/xlated_dumper.h
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
--- a/tools/include/uapi/linux/btf.h
+++ b/tools/include/uapi/linux/btf.h
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
--- a/tools/testing/selftests/bpf/test_btf.c
+++ b/tools/testing/selftests/bpf/test_btf.c
--- a/tools/testing/selftests/bpf/test_lwt_seg6local.c
+++ b/tools/testing/selftests/bpf/test_lwt_seg6local.c
--- a/tools/testing/selftests/bpf/test_lwt_seg6local.sh
+++ b/tools/testing/selftests/bpf/test_lwt_seg6local.sh
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
--- a/tools/testing/selftests/bpf/trace_helpers.c
+++ b/tools/testing/selftests/bpf/trace_helpers.c
--- a/tools/testing/selftests/bpf/trace_helpers.h
+++ b/tools/testing/selftests/bpf/trace_helpers.h