diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 0fe98a567125aa7c339f5dfa537d278cda223868..73a5cf18fd84f3553f564456f0e482977afdfbe2 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -766,7 +766,7 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, func = (u8 *) __bpf_call_base + imm; /* Save skb pointer if we need to re-cache skb data */ - if (bpf_helper_changes_skb_data(func)) + if (bpf_helper_changes_pkt_data(func)) PPC_BPF_STL(3, 1, bpf_jit_stack_local(ctx)); bpf_jit_emit_func_call(image, ctx, (u64)func); @@ -775,7 +775,7 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, PPC_MR(b2p[BPF_REG_0], 3); /* refresh skb cache */ - if (bpf_helper_changes_skb_data(func)) { + if (bpf_helper_changes_pkt_data(func)) { /* reload skb pointer to r3 */ PPC_BPF_LL(3, 1, bpf_jit_stack_local(ctx)); bpf_jit_emit_skb_loads(image, ctx); diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index bee281f3163d039e2fa203bcdaa02b8bf87d6ced..167b31b186c1313c2cea094ee3b972233d61a608 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -981,7 +981,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i EMIT2(0x0d00, REG_14, REG_W1); /* lgr %b0,%r2: load return value into %b0 */ EMIT4(0xb9040000, BPF_REG_0, REG_2); - if (bpf_helper_changes_skb_data((void *)func)) { + if (bpf_helper_changes_pkt_data((void *)func)) { jit->seen |= SEEN_SKB_CHANGE; /* lg %b1,ST_OFF_SKBP(%r15) */ EMIT6_DISP_LH(0xe3000000, 0x0004, BPF_REG_1, REG_0, diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index fe04a04dab8ec0df10c827623577102a994df71c..e76d1af60f7ad76a12fa4f01cf61b3c508ae0453 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -853,7 +853,7 @@ xadd: if (is_imm8(insn->off)) func = (u8 *) __bpf_call_base + imm32; jmp_offset = func - (image + addrs[i]); if (seen_ld_abs) { - reload_skb_data = bpf_helper_changes_skb_data(func); + reload_skb_data = bpf_helper_changes_pkt_data(func); if (reload_skb_data) { EMIT1(0x57); /* push %rdi */ jmp_offset += 22; /* pop, mov, sub, mov */ diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 49a81f1fc1d60082671e4ad462577f7beb01abe9..bcd95533905865d38c80f72ac32e34f8a653a839 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -51,6 +51,9 @@ #include "mlx4_en.h" #include "en_port.h" +#define MLX4_EN_MAX_XDP_MTU ((int)(PAGE_SIZE - ETH_HLEN - (2 * VLAN_HLEN) - \ + XDP_PACKET_HEADROOM)) + int mlx4_en_setup_tc(struct net_device *dev, u8 up) { struct mlx4_en_priv *priv = netdev_priv(dev); @@ -2249,6 +2252,19 @@ void mlx4_en_destroy_netdev(struct net_device *dev) free_netdev(dev); } +static bool mlx4_en_check_xdp_mtu(struct net_device *dev, int mtu) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + + if (mtu > MLX4_EN_MAX_XDP_MTU) { + en_err(priv, "mtu:%d > max:%d when XDP prog is attached\n", + mtu, MLX4_EN_MAX_XDP_MTU); + return false; + } + + return true; +} + static int mlx4_en_change_mtu(struct net_device *dev, int new_mtu) { struct mlx4_en_priv *priv = netdev_priv(dev); @@ -2258,11 +2274,10 @@ static int mlx4_en_change_mtu(struct net_device *dev, int new_mtu) en_dbg(DRV, priv, "Change MTU called - current:%d new:%d\n", dev->mtu, new_mtu); - if (priv->tx_ring_num[TX_XDP] && MLX4_EN_EFF_MTU(new_mtu) > FRAG_SZ0) { - en_err(priv, "MTU size:%d requires frags but XDP running\n", - new_mtu); - return -EOPNOTSUPP; - } + if (priv->tx_ring_num[TX_XDP] && + !mlx4_en_check_xdp_mtu(dev, new_mtu)) + return -ENOTSUPP; + dev->mtu = new_mtu; if (netif_running(dev)) { @@ -2710,10 +2725,8 @@ static int mlx4_xdp_set(struct net_device *dev, struct bpf_prog *prog) return 0; } - if (priv->num_frags > 1) { - en_err(priv, "Cannot set XDP if MTU requires multiple frags\n"); + if (!mlx4_en_check_xdp_mtu(dev, dev->mtu)) return -EOPNOTSUPP; - } tmp = kzalloc(sizeof(*tmp), GFP_KERNEL); if (!tmp) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 6562f78b07f4370b5c1ea2c5e3a4221d7ebaeba8..3c37e216bbf324d527a27bee53418f2ecd62e724 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -96,7 +96,6 @@ static int mlx4_en_alloc_frags(struct mlx4_en_priv *priv, struct mlx4_en_rx_alloc page_alloc[MLX4_EN_MAX_RX_FRAGS]; const struct mlx4_en_frag_info *frag_info; struct page *page; - dma_addr_t dma; int i; for (i = 0; i < priv->num_frags; i++) { @@ -115,9 +114,10 @@ static int mlx4_en_alloc_frags(struct mlx4_en_priv *priv, for (i = 0; i < priv->num_frags; i++) { frags[i] = ring_alloc[i]; - dma = ring_alloc[i].dma + ring_alloc[i].page_offset; + frags[i].page_offset += priv->frag_info[i].rx_headroom; + rx_desc->data[i].addr = cpu_to_be64(frags[i].dma + + frags[i].page_offset); ring_alloc[i] = page_alloc[i]; - rx_desc->data[i].addr = cpu_to_be64(dma); } return 0; @@ -250,7 +250,8 @@ static int mlx4_en_prepare_rx_desc(struct mlx4_en_priv *priv, if (ring->page_cache.index > 0) { frags[0] = ring->page_cache.buf[--ring->page_cache.index]; - rx_desc->data[0].addr = cpu_to_be64(frags[0].dma); + rx_desc->data[0].addr = cpu_to_be64(frags[0].dma + + frags[0].page_offset); return 0; } @@ -889,6 +890,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud if (xdp_prog) { struct xdp_buff xdp; dma_addr_t dma; + void *orig_data; u32 act; dma = be64_to_cpu(rx_desc->data[0].addr); @@ -896,11 +898,19 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud priv->frag_info[0].frag_size, DMA_FROM_DEVICE); - xdp.data = page_address(frags[0].page) + - frags[0].page_offset; + xdp.data_hard_start = page_address(frags[0].page); + xdp.data = xdp.data_hard_start + frags[0].page_offset; xdp.data_end = xdp.data + length; + orig_data = xdp.data; act = bpf_prog_run_xdp(xdp_prog, &xdp); + + if (xdp.data != orig_data) { + length = xdp.data_end - xdp.data; + frags[0].page_offset = xdp.data - + xdp.data_hard_start; + } + switch (act) { case XDP_PASS: break; @@ -1164,37 +1174,41 @@ static const int frag_sizes[] = { void mlx4_en_calc_rx_buf(struct net_device *dev) { - enum dma_data_direction dma_dir = PCI_DMA_FROMDEVICE; struct mlx4_en_priv *priv = netdev_priv(dev); int eff_mtu = MLX4_EN_EFF_MTU(dev->mtu); - int order = MLX4_EN_ALLOC_PREFER_ORDER; - u32 align = SMP_CACHE_BYTES; - int buf_size = 0; int i = 0; /* bpf requires buffers to be set up as 1 packet per page. * This only works when num_frags == 1. */ if (priv->tx_ring_num[TX_XDP]) { - dma_dir = PCI_DMA_BIDIRECTIONAL; - /* This will gain efficient xdp frame recycling at the expense - * of more costly truesize accounting + priv->frag_info[0].order = 0; + priv->frag_info[0].frag_size = eff_mtu; + priv->frag_info[0].frag_prefix_size = 0; + /* This will gain efficient xdp frame recycling at the + * expense of more costly truesize accounting */ - align = PAGE_SIZE; - order = 0; - } - - while (buf_size < eff_mtu) { - priv->frag_info[i].order = order; - priv->frag_info[i].frag_size = - (eff_mtu > buf_size + frag_sizes[i]) ? - frag_sizes[i] : eff_mtu - buf_size; - priv->frag_info[i].frag_prefix_size = buf_size; - priv->frag_info[i].frag_stride = - ALIGN(priv->frag_info[i].frag_size, align); - priv->frag_info[i].dma_dir = dma_dir; - buf_size += priv->frag_info[i].frag_size; - i++; + priv->frag_info[0].frag_stride = PAGE_SIZE; + priv->frag_info[0].dma_dir = PCI_DMA_BIDIRECTIONAL; + priv->frag_info[0].rx_headroom = XDP_PACKET_HEADROOM; + i = 1; + } else { + int buf_size = 0; + + while (buf_size < eff_mtu) { + priv->frag_info[i].order = MLX4_EN_ALLOC_PREFER_ORDER; + priv->frag_info[i].frag_size = + (eff_mtu > buf_size + frag_sizes[i]) ? + frag_sizes[i] : eff_mtu - buf_size; + priv->frag_info[i].frag_prefix_size = buf_size; + priv->frag_info[i].frag_stride = + ALIGN(priv->frag_info[i].frag_size, + SMP_CACHE_BYTES); + priv->frag_info[i].dma_dir = PCI_DMA_FROMDEVICE; + priv->frag_info[i].rx_headroom = 0; + buf_size += priv->frag_info[i].frag_size; + i++; + } } priv->num_frags = i; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 4b597dca5c52d114344d638895275ed0d378bd96..5886ad78058f2dc02afcf7ae8851572138fc372a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -354,7 +354,7 @@ u32 mlx4_en_recycle_tx_desc(struct mlx4_en_priv *priv, struct mlx4_en_rx_alloc frame = { .page = tx_info->page, .dma = tx_info->map0_dma, - .page_offset = 0, + .page_offset = XDP_PACKET_HEADROOM, .page_size = PAGE_SIZE, }; @@ -1132,7 +1132,7 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring, tx_info->page = frame->page; frame->page = NULL; tx_info->map0_dma = dma; - tx_info->map0_byte_count = length; + tx_info->map0_byte_count = PAGE_SIZE; tx_info->nr_txbb = nr_txbb; tx_info->nr_bytes = max_t(unsigned int, length, ETH_ZLEN); tx_info->data_offset = (void *)data - (void *)tx_desc; @@ -1141,9 +1141,10 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring, tx_info->linear = 1; tx_info->inl = 0; - dma_sync_single_for_device(priv->ddev, dma, length, PCI_DMA_TODEVICE); + dma_sync_single_range_for_device(priv->ddev, dma, frame->page_offset, + length, PCI_DMA_TODEVICE); - data->addr = cpu_to_be64(dma); + data->addr = cpu_to_be64(dma + frame->page_offset); data->lkey = ring->mr_key; dma_wmb(); data->byte_count = cpu_to_be32(length); diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index 20a936428f4a44c8ca0a7161855da310f9166b50..ba1c6cd0cc79590075f4420a930b613c9fdedc62 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -475,7 +475,8 @@ struct mlx4_en_frag_info { u16 frag_prefix_size; u32 frag_stride; enum dma_data_direction dma_dir; - int order; + u16 order; + u16 rx_headroom; }; #ifdef CONFIG_MLX4_EN_DCB diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 07020276fe73f0fef2ad41882b9df0c713d7b7fc..cbfa38fc72c0875d6754595e1335c5d1a5af26ff 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3183,6 +3183,11 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) bool reset, was_opened; int i; + if (prog && prog->xdp_adjust_head) { + netdev_err(netdev, "Does not support bpf_xdp_adjust_head()\n"); + return -EOPNOTSUPP; + } + mutex_lock(&priv->state_lock); if ((netdev->features & NETIF_F_LRO) && prog) { diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 00d9a03be31df518b986eb5a14c1fb6377c1e1f8..e8d448109e03d518c1b67e3046c640cfd2c71795 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -2946,6 +2946,10 @@ static int nfp_net_xdp_setup(struct nfp_net *nn, struct bpf_prog *prog) }; int err; + if (prog && prog->xdp_adjust_head) { + nn_err(nn, "Does not support bpf_xdp_adjust_head()\n"); + return -EOPNOTSUPP; + } if (!prog && !nn->xdp_prog) return 0; if (prog && nn->xdp_prog) { diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index cf1dd1436d939b337100b36a5ce8c04a75f919d2..aecdd1c5c0ea24a368085c0b2a41f5891ce55ac1 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -2507,6 +2507,11 @@ static int qede_xdp_set(struct qede_dev *edev, struct bpf_prog *prog) { struct qede_reload_args args; + if (prog && prog->xdp_adjust_head) { + DP_ERR(edev, "Does not support bpf_xdp_adjust_head()\n"); + return -EOPNOTSUPP; + } + /* If we're called, there was already a bpf reference increment */ args.func = &qede_xdp_reload_func; args.u.new_prog = prog; diff --git a/include/linux/filter.h b/include/linux/filter.h index f078d2b1cff6a7c80f97cfb5fad7da966a64af86..6a1658308612622b3d5e3eff2bfcd8cf5c1ff386 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -406,7 +406,8 @@ struct bpf_prog { u16 jited:1, /* Is our filter JIT'ed? */ gpl_compatible:1, /* Is filter GPL compatible? */ cb_access:1, /* Is control block accessed? */ - dst_needed:1; /* Do we need dst entry? */ + dst_needed:1, /* Do we need dst entry? */ + xdp_adjust_head:1; /* Adjusting pkt head? */ kmemcheck_bitfield_end(meta); enum bpf_prog_type type; /* Type of BPF program */ u32 len; /* Number of filter blocks */ @@ -440,6 +441,7 @@ struct bpf_skb_data_end { struct xdp_buff { void *data; void *data_end; + void *data_hard_start; }; /* compute the linear packet data range [data, data_end) which @@ -595,7 +597,7 @@ void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp); u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog); -bool bpf_helper_changes_skb_data(void *func); +bool bpf_helper_changes_pkt_data(void *func); struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, const struct bpf_insn *patch, u32 len); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 6123d9b8e828b2ff0d825bd8f4ffe374daac9b4f..0eb0e87dbe9f511672102f2123129328288a9159 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -424,6 +424,12 @@ union bpf_attr { * @len: length of header to be pushed in front * @flags: Flags (unused for now) * Return: 0 on success or negative error + * + * int bpf_xdp_adjust_head(xdp_md, delta) + * Adjust the xdp_md.data by delta + * @xdp_md: pointer to xdp_md + * @delta: An positive/negative integer to be added to xdp_md.data + * Return: 0 on success or negative on error */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -469,7 +475,8 @@ union bpf_attr { FN(csum_update), \ FN(set_hash_invalid), \ FN(get_numa_node_id), \ - FN(skb_change_head), + FN(skb_change_head), \ + FN(xdp_adjust_head), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -576,6 +583,8 @@ struct bpf_sock { __u32 protocol; }; +#define XDP_PACKET_HEADROOM 256 + /* User return codes for XDP prog type. * A valid XDP program must return one of these defined values. All other * return codes are reserved for future use. Unknown return codes will result diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index bdcc9f4ba7678a077c554c63963935a562796d5f..83e0d153b0b46d64fe4916ea50ccced539c6ea9a 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1143,7 +1143,7 @@ struct bpf_prog * __weak bpf_int_jit_compile(struct bpf_prog *prog) return prog; } -bool __weak bpf_helper_changes_skb_data(void *func) +bool __weak bpf_helper_changes_pkt_data(void *func) { return false; } diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 88f609f1c0c3ad71b40ffc1286800a9bfeb8e7c3..4819ec9d95f6bdb82f4ae475d85b48c55f6e400c 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -579,6 +579,8 @@ static void fixup_bpf_calls(struct bpf_prog *prog) prog->dst_needed = 1; if (insn->imm == BPF_FUNC_get_prandom_u32) bpf_user_rnd_init_once(); + if (insn->imm == BPF_FUNC_xdp_adjust_head) + prog->xdp_adjust_head = 1; if (insn->imm == BPF_FUNC_tail_call) { /* mark bpf_tail_call as different opcode * to avoid conditional branch in diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 5b14f85f45c6258a70d52d31d57861c9e2aa3d87..d28f9a3380a91d4b6f600a2027b99c868a1bb768 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1216,7 +1216,7 @@ static int check_call(struct bpf_verifier_env *env, int func_id) return -EINVAL; } - changes_data = bpf_helper_changes_skb_data(fn->func); + changes_data = bpf_helper_changes_pkt_data(fn->func); memset(&meta, 0, sizeof(meta)); meta.pkt_access = fn->pkt_access; diff --git a/net/core/filter.c b/net/core/filter.c index b751202e12f800e80c639d2364eed9aeec02f27d..b1461708a9774b03376d78a8761677feb031bfe6 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2234,7 +2234,28 @@ static const struct bpf_func_proto bpf_skb_change_head_proto = { .arg3_type = ARG_ANYTHING, }; -bool bpf_helper_changes_skb_data(void *func) +BPF_CALL_2(bpf_xdp_adjust_head, struct xdp_buff *, xdp, int, offset) +{ + void *data = xdp->data + offset; + + if (unlikely(data < xdp->data_hard_start || + data > xdp->data_end - ETH_HLEN)) + return -EINVAL; + + xdp->data = data; + + return 0; +} + +static const struct bpf_func_proto bpf_xdp_adjust_head_proto = { + .func = bpf_xdp_adjust_head, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, +}; + +bool bpf_helper_changes_pkt_data(void *func) { if (func == bpf_skb_vlan_push || func == bpf_skb_vlan_pop || @@ -2244,7 +2265,8 @@ bool bpf_helper_changes_skb_data(void *func) func == bpf_skb_change_tail || func == bpf_skb_pull_data || func == bpf_l3_csum_replace || - func == bpf_l4_csum_replace) + func == bpf_l4_csum_replace || + func == bpf_xdp_adjust_head) return true; return false; @@ -2670,6 +2692,8 @@ xdp_func_proto(enum bpf_func_id func_id) return &bpf_xdp_event_output_proto; case BPF_FUNC_get_smp_processor_id: return &bpf_get_smp_processor_id_proto; + case BPF_FUNC_xdp_adjust_head: + return &bpf_xdp_adjust_head_proto; default: return sk_filter_func_proto(func_id); } diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 00cd3081c0382ec9cdad3f1b946dcabdad400bd1..f2219c1489e5aa0f77b40929796dca8ff2794fe5 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -33,6 +33,7 @@ hostprogs-y += trace_event hostprogs-y += sampleip hostprogs-y += tc_l2_redirect hostprogs-y += lwt_len_hist +hostprogs-y += xdp_tx_iptunnel test_lru_dist-objs := test_lru_dist.o libbpf.o sock_example-objs := sock_example.o libbpf.o @@ -67,6 +68,7 @@ trace_event-objs := bpf_load.o libbpf.o trace_event_user.o sampleip-objs := bpf_load.o libbpf.o sampleip_user.o tc_l2_redirect-objs := bpf_load.o libbpf.o tc_l2_redirect_user.o lwt_len_hist-objs := bpf_load.o libbpf.o lwt_len_hist_user.o +xdp_tx_iptunnel-objs := bpf_load.o libbpf.o xdp_tx_iptunnel_user.o # Tell kbuild to always build the programs always := $(hostprogs-y) @@ -99,6 +101,7 @@ always += test_current_task_under_cgroup_kern.o always += trace_event_kern.o always += sampleip_kern.o always += lwt_len_hist_kern.o +always += xdp_tx_iptunnel_kern.o HOSTCFLAGS += -I$(objtree)/usr/include HOSTCFLAGS += -I$(srctree)/tools/testing/selftests/bpf/ @@ -129,6 +132,7 @@ HOSTLOADLIBES_trace_event += -lelf HOSTLOADLIBES_sampleip += -lelf HOSTLOADLIBES_tc_l2_redirect += -l elf HOSTLOADLIBES_lwt_len_hist += -l elf +HOSTLOADLIBES_xdp_tx_iptunnel += -lelf # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline: # make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h index 8370a6e3839dabaa0c38b056c62c36c1faa2e983..faaffe2e139a989de6f90835121a725091d4a289 100644 --- a/samples/bpf/bpf_helpers.h +++ b/samples/bpf/bpf_helpers.h @@ -57,6 +57,8 @@ static int (*bpf_skb_set_tunnel_opt)(void *ctx, void *md, int size) = (void *) BPF_FUNC_skb_set_tunnel_opt; static unsigned long long (*bpf_get_prandom_u32)(void) = (void *) BPF_FUNC_get_prandom_u32; +static int (*bpf_xdp_adjust_head)(void *ctx, int offset) = + (void *) BPF_FUNC_xdp_adjust_head; /* llvm builtin functions that eBPF C program may use to * emit BPF_LD_ABS and BPF_LD_IND instructions diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c index 49b45ccbe1530a7839ceb008c5e6008655ba734b..e30b6de94f2ee393afc1f9feb23d0a461a46a7da 100644 --- a/samples/bpf/bpf_load.c +++ b/samples/bpf/bpf_load.c @@ -12,6 +12,10 @@ #include #include #include +#include +#include +#include +#include #include #include #include @@ -450,3 +454,93 @@ struct ksym *ksym_search(long key) /* out of range. return _stext */ return &syms[0]; } + +int set_link_xdp_fd(int ifindex, int fd) +{ + struct sockaddr_nl sa; + int sock, seq = 0, len, ret = -1; + char buf[4096]; + struct nlattr *nla, *nla_xdp; + struct { + struct nlmsghdr nh; + struct ifinfomsg ifinfo; + char attrbuf[64]; + } req; + struct nlmsghdr *nh; + struct nlmsgerr *err; + + memset(&sa, 0, sizeof(sa)); + sa.nl_family = AF_NETLINK; + + sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + if (sock < 0) { + printf("open netlink socket: %s\n", strerror(errno)); + return -1; + } + + if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) { + printf("bind to netlink: %s\n", strerror(errno)); + goto cleanup; + } + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)); + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.nh.nlmsg_type = RTM_SETLINK; + req.nh.nlmsg_pid = 0; + req.nh.nlmsg_seq = ++seq; + req.ifinfo.ifi_family = AF_UNSPEC; + req.ifinfo.ifi_index = ifindex; + nla = (struct nlattr *)(((char *)&req) + + NLMSG_ALIGN(req.nh.nlmsg_len)); + nla->nla_type = NLA_F_NESTED | 43/*IFLA_XDP*/; + + nla_xdp = (struct nlattr *)((char *)nla + NLA_HDRLEN); + nla_xdp->nla_type = 1/*IFLA_XDP_FD*/; + nla_xdp->nla_len = NLA_HDRLEN + sizeof(int); + memcpy((char *)nla_xdp + NLA_HDRLEN, &fd, sizeof(fd)); + nla->nla_len = NLA_HDRLEN + nla_xdp->nla_len; + + req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len); + + if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) { + printf("send to netlink: %s\n", strerror(errno)); + goto cleanup; + } + + len = recv(sock, buf, sizeof(buf), 0); + if (len < 0) { + printf("recv from netlink: %s\n", strerror(errno)); + goto cleanup; + } + + for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len); + nh = NLMSG_NEXT(nh, len)) { + if (nh->nlmsg_pid != getpid()) { + printf("Wrong pid %d, expected %d\n", + nh->nlmsg_pid, getpid()); + goto cleanup; + } + if (nh->nlmsg_seq != seq) { + printf("Wrong seq %d, expected %d\n", + nh->nlmsg_seq, seq); + goto cleanup; + } + switch (nh->nlmsg_type) { + case NLMSG_ERROR: + err = (struct nlmsgerr *)NLMSG_DATA(nh); + if (!err->error) + continue; + printf("nlmsg error %s\n", strerror(-err->error)); + goto cleanup; + case NLMSG_DONE: + break; + } + } + + ret = 0; + +cleanup: + close(sock); + return ret; +} diff --git a/samples/bpf/bpf_load.h b/samples/bpf/bpf_load.h index 4adeeef53ad6ea9f4c269cf8dd0aa52ff9ccf110..fb46a421ab411fefe2c23ed36af4cbfee27a3807 100644 --- a/samples/bpf/bpf_load.h +++ b/samples/bpf/bpf_load.h @@ -31,4 +31,5 @@ struct ksym { int load_kallsyms(void); struct ksym *ksym_search(long key); +int set_link_xdp_fd(int ifindex, int fd); #endif diff --git a/samples/bpf/xdp1_user.c b/samples/bpf/xdp1_user.c index 2b2150d6d6f78496a7bf7cc0026b6b9258f5d39c..5f040a0d771291f7493226e9a4cdc312732fad1d 100644 --- a/samples/bpf/xdp1_user.c +++ b/samples/bpf/xdp1_user.c @@ -5,111 +5,18 @@ * License as published by the Free Software Foundation. */ #include -#include -#include #include #include #include #include #include #include -#include #include #include "bpf_load.h" #include "bpf_util.h" #include "libbpf.h" -static int set_link_xdp_fd(int ifindex, int fd) -{ - struct sockaddr_nl sa; - int sock, seq = 0, len, ret = -1; - char buf[4096]; - struct nlattr *nla, *nla_xdp; - struct { - struct nlmsghdr nh; - struct ifinfomsg ifinfo; - char attrbuf[64]; - } req; - struct nlmsghdr *nh; - struct nlmsgerr *err; - - memset(&sa, 0, sizeof(sa)); - sa.nl_family = AF_NETLINK; - - sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); - if (sock < 0) { - printf("open netlink socket: %s\n", strerror(errno)); - return -1; - } - - if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) { - printf("bind to netlink: %s\n", strerror(errno)); - goto cleanup; - } - - memset(&req, 0, sizeof(req)); - req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)); - req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; - req.nh.nlmsg_type = RTM_SETLINK; - req.nh.nlmsg_pid = 0; - req.nh.nlmsg_seq = ++seq; - req.ifinfo.ifi_family = AF_UNSPEC; - req.ifinfo.ifi_index = ifindex; - nla = (struct nlattr *)(((char *)&req) - + NLMSG_ALIGN(req.nh.nlmsg_len)); - nla->nla_type = NLA_F_NESTED | 43/*IFLA_XDP*/; - - nla_xdp = (struct nlattr *)((char *)nla + NLA_HDRLEN); - nla_xdp->nla_type = 1/*IFLA_XDP_FD*/; - nla_xdp->nla_len = NLA_HDRLEN + sizeof(int); - memcpy((char *)nla_xdp + NLA_HDRLEN, &fd, sizeof(fd)); - nla->nla_len = NLA_HDRLEN + nla_xdp->nla_len; - - req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len); - - if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) { - printf("send to netlink: %s\n", strerror(errno)); - goto cleanup; - } - - len = recv(sock, buf, sizeof(buf), 0); - if (len < 0) { - printf("recv from netlink: %s\n", strerror(errno)); - goto cleanup; - } - - for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len); - nh = NLMSG_NEXT(nh, len)) { - if (nh->nlmsg_pid != getpid()) { - printf("Wrong pid %d, expected %d\n", - nh->nlmsg_pid, getpid()); - goto cleanup; - } - if (nh->nlmsg_seq != seq) { - printf("Wrong seq %d, expected %d\n", - nh->nlmsg_seq, seq); - goto cleanup; - } - switch (nh->nlmsg_type) { - case NLMSG_ERROR: - err = (struct nlmsgerr *)NLMSG_DATA(nh); - if (!err->error) - continue; - printf("nlmsg error %s\n", strerror(-err->error)); - goto cleanup; - case NLMSG_DONE: - break; - } - } - - ret = 0; - -cleanup: - close(sock); - return ret; -} - static int ifindex; static void int_exit(int sig) diff --git a/samples/bpf/xdp_tx_iptunnel_common.h b/samples/bpf/xdp_tx_iptunnel_common.h new file mode 100644 index 0000000000000000000000000000000000000000..dd12cc35110f0eecdb6df8c627e3530a674cfe42 --- /dev/null +++ b/samples/bpf/xdp_tx_iptunnel_common.h @@ -0,0 +1,37 @@ +/* Copyright (c) 2016 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#ifndef _SAMPLES_BPF_XDP_TX_IPTNL_COMMON_H +#define _SAMPLES_BPF_XDP_TX_IPTNL_COMMON_H + +#include + +#define MAX_IPTNL_ENTRIES 256U + +struct vip { + union { + __u32 v6[4]; + __u32 v4; + } daddr; + __u16 dport; + __u16 family; + __u8 protocol; +}; + +struct iptnl_info { + union { + __u32 v6[4]; + __u32 v4; + } saddr; + union { + __u32 v6[4]; + __u32 v4; + } daddr; + __u16 family; + __u8 dmac[6]; +}; + +#endif diff --git a/samples/bpf/xdp_tx_iptunnel_kern.c b/samples/bpf/xdp_tx_iptunnel_kern.c new file mode 100644 index 0000000000000000000000000000000000000000..85c38ecd3a2dcf5f65f99011a73be87d78c287ff --- /dev/null +++ b/samples/bpf/xdp_tx_iptunnel_kern.c @@ -0,0 +1,236 @@ +/* Copyright (c) 2016 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program shows how to use bpf_xdp_adjust_head() by + * encapsulating the incoming packet in an IPv4/v6 header + * and then XDP_TX it out. + */ +#include +#include +#include +#include +#include +#include +#include +#include "bpf_helpers.h" +#include "xdp_tx_iptunnel_common.h" + +struct bpf_map_def SEC("maps") rxcnt = { + .type = BPF_MAP_TYPE_PERCPU_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(__u64), + .max_entries = 256, +}; + +struct bpf_map_def SEC("maps") vip2tnl = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(struct vip), + .value_size = sizeof(struct iptnl_info), + .max_entries = MAX_IPTNL_ENTRIES, +}; + +static __always_inline void count_tx(u32 protocol) +{ + u64 *rxcnt_count; + + rxcnt_count = bpf_map_lookup_elem(&rxcnt, &protocol); + if (rxcnt_count) + *rxcnt_count += 1; +} + +static __always_inline int get_dport(void *trans_data, void *data_end, + u8 protocol) +{ + struct tcphdr *th; + struct udphdr *uh; + + switch (protocol) { + case IPPROTO_TCP: + th = (struct tcphdr *)trans_data; + if (th + 1 > data_end) + return -1; + return th->dest; + case IPPROTO_UDP: + uh = (struct udphdr *)trans_data; + if (uh + 1 > data_end) + return -1; + return uh->dest; + default: + return 0; + } +} + +static __always_inline void set_ethhdr(struct ethhdr *new_eth, + const struct ethhdr *old_eth, + const struct iptnl_info *tnl, + __be16 h_proto) +{ + memcpy(new_eth->h_source, old_eth->h_dest, sizeof(new_eth->h_source)); + memcpy(new_eth->h_dest, tnl->dmac, sizeof(new_eth->h_dest)); + new_eth->h_proto = h_proto; +} + +static __always_inline int handle_ipv4(struct xdp_md *xdp) +{ + void *data_end = (void *)(long)xdp->data_end; + void *data = (void *)(long)xdp->data; + struct iptnl_info *tnl; + struct ethhdr *new_eth; + struct ethhdr *old_eth; + struct iphdr *iph = data + sizeof(struct ethhdr); + u16 *next_iph_u16; + u16 payload_len; + struct vip vip = {}; + int dport; + u32 csum = 0; + int i; + + if (iph + 1 > data_end) + return XDP_DROP; + + dport = get_dport(iph + 1, data_end, iph->protocol); + if (dport == -1) + return XDP_DROP; + + vip.protocol = iph->protocol; + vip.family = AF_INET; + vip.daddr.v4 = iph->daddr; + vip.dport = dport; + payload_len = ntohs(iph->tot_len); + + tnl = bpf_map_lookup_elem(&vip2tnl, &vip); + /* It only does v4-in-v4 */ + if (!tnl || tnl->family != AF_INET) + return XDP_PASS; + + /* The vip key is found. Add an IP header and send it out */ + + if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct iphdr))) + return XDP_DROP; + + data = (void *)(long)xdp->data; + data_end = (void *)(long)xdp->data_end; + + new_eth = data; + iph = data + sizeof(*new_eth); + old_eth = data + sizeof(*iph); + + if (new_eth + 1 > data_end || + old_eth + 1 > data_end || + iph + 1 > data_end) + return XDP_DROP; + + set_ethhdr(new_eth, old_eth, tnl, htons(ETH_P_IP)); + + iph->version = 4; + iph->ihl = sizeof(*iph) >> 2; + iph->frag_off = 0; + iph->protocol = IPPROTO_IPIP; + iph->check = 0; + iph->tos = 0; + iph->tot_len = htons(payload_len + sizeof(*iph)); + iph->daddr = tnl->daddr.v4; + iph->saddr = tnl->saddr.v4; + iph->ttl = 8; + + next_iph_u16 = (u16 *)iph; +#pragma clang loop unroll(full) + for (i = 0; i < sizeof(*iph) >> 1; i++) + csum += *next_iph_u16++; + + iph->check = ~((csum & 0xffff) + (csum >> 16)); + + count_tx(vip.protocol); + + return XDP_TX; +} + +static __always_inline int handle_ipv6(struct xdp_md *xdp) +{ + void *data_end = (void *)(long)xdp->data_end; + void *data = (void *)(long)xdp->data; + struct iptnl_info *tnl; + struct ethhdr *new_eth; + struct ethhdr *old_eth; + struct ipv6hdr *ip6h = data + sizeof(struct ethhdr); + __u16 payload_len; + struct vip vip = {}; + int dport; + + if (ip6h + 1 > data_end) + return XDP_DROP; + + dport = get_dport(ip6h + 1, data_end, ip6h->nexthdr); + if (dport == -1) + return XDP_DROP; + + vip.protocol = ip6h->nexthdr; + vip.family = AF_INET6; + memcpy(vip.daddr.v6, ip6h->daddr.s6_addr32, sizeof(vip.daddr)); + vip.dport = dport; + payload_len = ip6h->payload_len; + + tnl = bpf_map_lookup_elem(&vip2tnl, &vip); + /* It only does v6-in-v6 */ + if (!tnl || tnl->family != AF_INET6) + return XDP_PASS; + + /* The vip key is found. Add an IP header and send it out */ + + if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct ipv6hdr))) + return XDP_DROP; + + data = (void *)(long)xdp->data; + data_end = (void *)(long)xdp->data_end; + + new_eth = data; + ip6h = data + sizeof(*new_eth); + old_eth = data + sizeof(*ip6h); + + if (new_eth + 1 > data_end || + old_eth + 1 > data_end || + ip6h + 1 > data_end) + return XDP_DROP; + + set_ethhdr(new_eth, old_eth, tnl, htons(ETH_P_IPV6)); + + ip6h->version = 6; + ip6h->priority = 0; + memset(ip6h->flow_lbl, 0, sizeof(ip6h->flow_lbl)); + ip6h->payload_len = htons(ntohs(payload_len) + sizeof(*ip6h)); + ip6h->nexthdr = IPPROTO_IPV6; + ip6h->hop_limit = 8; + memcpy(ip6h->saddr.s6_addr32, tnl->saddr.v6, sizeof(tnl->saddr.v6)); + memcpy(ip6h->daddr.s6_addr32, tnl->daddr.v6, sizeof(tnl->daddr.v6)); + + count_tx(vip.protocol); + + return XDP_TX; +} + +SEC("xdp_tx_iptunnel") +int _xdp_tx_iptunnel(struct xdp_md *xdp) +{ + void *data_end = (void *)(long)xdp->data_end; + void *data = (void *)(long)xdp->data; + struct ethhdr *eth = data; + __u16 h_proto; + + if (eth + 1 > data_end) + return XDP_DROP; + + h_proto = eth->h_proto; + + if (h_proto == htons(ETH_P_IP)) + return handle_ipv4(xdp); + else if (h_proto == htons(ETH_P_IPV6)) + + return handle_ipv6(xdp); + else + return XDP_PASS; +} + +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/xdp_tx_iptunnel_user.c b/samples/bpf/xdp_tx_iptunnel_user.c new file mode 100644 index 0000000000000000000000000000000000000000..7a71f5c746848fe29df7076c3aa7acb48101c544 --- /dev/null +++ b/samples/bpf/xdp_tx_iptunnel_user.c @@ -0,0 +1,256 @@ +/* Copyright (c) 2016 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "bpf_load.h" +#include "libbpf.h" +#include "bpf_util.h" +#include "xdp_tx_iptunnel_common.h" + +#define STATS_INTERVAL_S 2U + +static int ifindex = -1; + +static void int_exit(int sig) +{ + if (ifindex > -1) + set_link_xdp_fd(ifindex, -1); + exit(0); +} + +/* simple per-protocol drop counter + */ +static void poll_stats(unsigned int kill_after_s) +{ + const unsigned int nr_protos = 256; + unsigned int nr_cpus = bpf_num_possible_cpus(); + time_t started_at = time(NULL); + __u64 values[nr_cpus], prev[nr_protos][nr_cpus]; + __u32 proto; + int i; + + memset(prev, 0, sizeof(prev)); + + while (!kill_after_s || time(NULL) - started_at <= kill_after_s) { + sleep(STATS_INTERVAL_S); + + for (proto = 0; proto < nr_protos; proto++) { + __u64 sum = 0; + + assert(bpf_lookup_elem(map_fd[0], &proto, values) == 0); + for (i = 0; i < nr_cpus; i++) + sum += (values[i] - prev[proto][i]); + + if (sum) + printf("proto %u: sum:%10llu pkts, rate:%10llu pkts/s\n", + proto, sum, sum / STATS_INTERVAL_S); + memcpy(prev[proto], values, sizeof(values)); + } + } +} + +static void usage(const char *cmd) +{ + printf("Start a XDP prog which encapsulates incoming packets\n" + "in an IPv4/v6 header and XDP_TX it out. The dst \n" + "is used to select packets to encapsulate\n\n"); + printf("Usage: %s [...]\n", cmd); + printf(" -i Interface Index\n"); + printf(" -a IPv4 or IPv6\n"); + printf(" -p A port range (e.g. 433-444) is also allowed\n"); + printf(" -s Used in the IPTunnel header\n"); + printf(" -d Used in the IPTunnel header\n"); + printf(" -m Used in sending the IP Tunneled pkt\n"); + printf(" -T Default: 0 (forever)\n"); + printf(" -P Default is TCP\n"); + printf(" -h Display this help\n"); +} + +static int parse_ipstr(const char *ipstr, unsigned int *addr) +{ + if (inet_pton(AF_INET6, ipstr, addr) == 1) { + return AF_INET6; + } else if (inet_pton(AF_INET, ipstr, addr) == 1) { + addr[1] = addr[2] = addr[3] = 0; + return AF_INET; + } + + fprintf(stderr, "%s is an invalid IP\n", ipstr); + return AF_UNSPEC; +} + +static int parse_ports(const char *port_str, int *min_port, int *max_port) +{ + char *end; + long tmp_min_port; + long tmp_max_port; + + tmp_min_port = strtol(optarg, &end, 10); + if (tmp_min_port < 1 || tmp_min_port > 65535) { + fprintf(stderr, "Invalid port(s):%s\n", optarg); + return 1; + } + + if (*end == '-') { + end++; + tmp_max_port = strtol(end, NULL, 10); + if (tmp_max_port < 1 || tmp_max_port > 65535) { + fprintf(stderr, "Invalid port(s):%s\n", optarg); + return 1; + } + } else { + tmp_max_port = tmp_min_port; + } + + if (tmp_min_port > tmp_max_port) { + fprintf(stderr, "Invalid port(s):%s\n", optarg); + return 1; + } + + if (tmp_max_port - tmp_min_port + 1 > MAX_IPTNL_ENTRIES) { + fprintf(stderr, "Port range (%s) is larger than %u\n", + port_str, MAX_IPTNL_ENTRIES); + return 1; + } + *min_port = tmp_min_port; + *max_port = tmp_max_port; + + return 0; +} + +int main(int argc, char **argv) +{ + unsigned char opt_flags[256] = {}; + unsigned int kill_after_s = 0; + const char *optstr = "i:a:p:s:d:m:T:P:h"; + int min_port = 0, max_port = 0; + struct iptnl_info tnl = {}; + struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; + struct vip vip = {}; + char filename[256]; + int opt; + int i; + + tnl.family = AF_UNSPEC; + vip.protocol = IPPROTO_TCP; + + for (i = 0; i < strlen(optstr); i++) + if (optstr[i] != 'h' && 'a' <= optstr[i] && optstr[i] <= 'z') + opt_flags[(unsigned char)optstr[i]] = 1; + + while ((opt = getopt(argc, argv, optstr)) != -1) { + unsigned short family; + unsigned int *v6; + + switch (opt) { + case 'i': + ifindex = atoi(optarg); + break; + case 'a': + vip.family = parse_ipstr(optarg, vip.daddr.v6); + if (vip.family == AF_UNSPEC) + return 1; + break; + case 'p': + if (parse_ports(optarg, &min_port, &max_port)) + return 1; + break; + case 'P': + vip.protocol = atoi(optarg); + break; + case 's': + case 'd': + if (opt == 's') + v6 = tnl.saddr.v6; + else + v6 = tnl.daddr.v6; + + family = parse_ipstr(optarg, v6); + if (family == AF_UNSPEC) + return 1; + if (tnl.family == AF_UNSPEC) { + tnl.family = family; + } else if (tnl.family != family) { + fprintf(stderr, + "The IP version of the src and dst addresses used in the IP encapsulation does not match\n"); + return 1; + } + break; + case 'm': + if (!ether_aton_r(optarg, + (struct ether_addr *)tnl.dmac)) { + fprintf(stderr, "Invalid mac address:%s\n", + optarg); + return 1; + } + break; + case 'T': + kill_after_s = atoi(optarg); + break; + default: + usage(argv[0]); + return 1; + } + opt_flags[opt] = 0; + } + + for (i = 0; i < strlen(optstr); i++) { + if (opt_flags[(unsigned int)optstr[i]]) { + fprintf(stderr, "Missing argument -%c\n", optstr[i]); + usage(argv[0]); + return 1; + } + } + + if (setrlimit(RLIMIT_MEMLOCK, &r)) { + perror("setrlimit(RLIMIT_MEMLOCK, RLIM_INFINITY)"); + return 1; + } + + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + + if (load_bpf_file(filename)) { + printf("%s", bpf_log_buf); + return 1; + } + + if (!prog_fd[0]) { + printf("load_bpf_file: %s\n", strerror(errno)); + return 1; + } + + signal(SIGINT, int_exit); + + while (min_port <= max_port) { + vip.dport = htons(min_port++); + if (bpf_update_elem(map_fd[1], &vip, &tnl, BPF_NOEXIST)) { + perror("bpf_update_elem(&vip2tnl)"); + return 1; + } + } + + if (set_link_xdp_fd(ifindex, prog_fd[0]) < 0) { + printf("link set xdp fd failed\n"); + return 1; + } + + poll_stats(kill_after_s); + + set_link_xdp_fd(ifindex, -1); + + return 0; +}