diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 59d3cc81895b46fa8a3d6c965c93a6b751d6f1c5..9bf010e88da71f12534b0c63369a2ce7c5bbda79 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -302,6 +302,19 @@ netdev_tx_t bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb, return dev_queue_xmit(skb); } +static bool bond_xdp_check(struct bonding *bond) +{ + switch (BOND_MODE(bond)) { + case BOND_MODE_ROUNDROBIN: + case BOND_MODE_ACTIVEBACKUP: + case BOND_MODE_8023AD: + case BOND_MODE_XOR: + return true; + default: + return false; + } +} + /*---------------------------------- VLAN -----------------------------------*/ /* In the following 2 functions, bond_vlan_rx_add_vid and bond_vlan_rx_kill_vid, @@ -2118,6 +2131,41 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, bond_update_slave_arr(bond, NULL); + if (!slave_dev->netdev_ops->ndo_bpf || + !slave_dev->netdev_ops->ndo_xdp_xmit) { + if (bond->xdp_prog) { + NL_SET_ERR_MSG(extack, "Slave does not support XDP"); + slave_err(bond_dev, slave_dev, "Slave does not support XDP\n"); + res = -EOPNOTSUPP; + goto err_sysfs_del; + } + } else if (bond->xdp_prog) { + struct netdev_bpf xdp = { + .command = XDP_SETUP_PROG, + .flags = 0, + .prog = bond->xdp_prog, + .extack = extack, + }; + + if (dev_xdp_prog_count(slave_dev) > 0) { + NL_SET_ERR_MSG(extack, + "Slave has XDP program loaded, please unload before enslaving"); + slave_err(bond_dev, slave_dev, + "Slave has XDP program loaded, please unload before enslaving\n"); + res = -EOPNOTSUPP; + goto err_sysfs_del; + } + + res = slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp); + if (res < 0) { + /* ndo_bpf() sets extack error message */ + slave_dbg(bond_dev, slave_dev, "Error %d calling ndo_bpf\n", res); + goto err_sysfs_del; + } + if (bond->xdp_prog) + bpf_prog_inc(bond->xdp_prog); + } + slave_info(bond_dev, slave_dev, "Enslaving as %s interface with %s link\n", bond_is_active_slave(new_slave) ? "an active" : "a backup", new_slave->link != BOND_LINK_DOWN ? "an up" : "a down"); @@ -2235,6 +2283,17 @@ static int __bond_release_one(struct net_device *bond_dev, /* recompute stats just before removing the slave */ bond_get_stats(bond->dev, &bond->bond_stats); + if (bond->xdp_prog) { + struct netdev_bpf xdp = { + .command = XDP_SETUP_PROG, + .flags = 0, + .prog = NULL, + .extack = NULL, + }; + if (slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp)) + slave_warn(bond_dev, slave_dev, "failed to unload XDP program\n"); + } + /* unregister rx_handler early so bond_handle_frame wouldn't be called * for this slave anymore. */ @@ -3610,66 +3669,84 @@ static struct notifier_block bond_netdev_notifier = { }; /*---------------------------- Hashing Policies -----------------------------*/ +/* Helper to access data in a packet, with or without a backing skb. + * If skb is given the data is linearized if necessary via pskb_may_pull. + */ +static inline void *bond_pull_data(struct sk_buff *skb, + void *data, int hlen, int n) +{ + if (likely(n <= hlen)) + return data; + else if (skb && likely(pskb_may_pull(skb, n))) + return skb->head; + + return NULL; +} /* L2 hash helper */ -static inline u32 bond_eth_hash(struct sk_buff *skb) +static inline u32 bond_eth_hash(struct sk_buff *skb, void *data, int mhoff, int hlen) { - struct ethhdr *ep, hdr_tmp; + struct ethhdr *ep; - ep = skb_header_pointer(skb, 0, sizeof(hdr_tmp), &hdr_tmp); - if (ep) - return ep->h_dest[5] ^ ep->h_source[5] ^ ep->h_proto; - return 0; + data = bond_pull_data(skb, data, hlen, mhoff + sizeof(struct ethhdr)); + if (!data) + return 0; + + ep = (struct ethhdr *)(data + mhoff); + return ep->h_dest[5] ^ ep->h_source[5] ^ be16_to_cpu(ep->h_proto); } -static bool bond_flow_ip(struct sk_buff *skb, struct flow_keys *fk, - int *noff, int *proto, bool l34) +static bool bond_flow_ip(struct sk_buff *skb, struct flow_keys *fk, void *data, + int hlen, __be16 l2_proto, int *nhoff, int *ip_proto, bool l34) { const struct ipv6hdr *iph6; const struct iphdr *iph; - if (skb->protocol == htons(ETH_P_IP)) { - if (unlikely(!pskb_may_pull(skb, *noff + sizeof(*iph)))) + if (l2_proto == htons(ETH_P_IP)) { + data = bond_pull_data(skb, data, hlen, *nhoff + sizeof(*iph)); + if (!data) return false; - iph = (const struct iphdr *)(skb->data + *noff); + + iph = (const struct iphdr *)(data + *nhoff); iph_to_flow_copy_v4addrs(fk, iph); - *noff += iph->ihl << 2; + *nhoff += iph->ihl << 2; if (!ip_is_fragment(iph)) - *proto = iph->protocol; - } else if (skb->protocol == htons(ETH_P_IPV6)) { - if (unlikely(!pskb_may_pull(skb, *noff + sizeof(*iph6)))) + *ip_proto = iph->protocol; + } else if (l2_proto == htons(ETH_P_IPV6)) { + data = bond_pull_data(skb, data, hlen, *nhoff + sizeof(*iph6)); + if (!data) return false; - iph6 = (const struct ipv6hdr *)(skb->data + *noff); + + iph6 = (const struct ipv6hdr *)(data + *nhoff); iph_to_flow_copy_v6addrs(fk, iph6); - *noff += sizeof(*iph6); - *proto = iph6->nexthdr; + *nhoff += sizeof(*iph6); + *ip_proto = iph6->nexthdr; } else { return false; } - if (l34 && *proto >= 0) - fk->ports.ports = skb_flow_get_ports(skb, *noff, *proto); + if (l34 && *ip_proto >= 0) + fk->ports.ports = __skb_flow_get_ports(skb, *nhoff, *ip_proto, data, hlen); return true; } /* Extract the appropriate headers based on bond's xmit policy */ -static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb, - struct flow_keys *fk) +static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb, void *data, + __be16 l2_proto, int nhoff, int hlen, struct flow_keys *fk) { bool l34 = bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34; - int noff, proto = -1; + int ip_proto = -1; if (bond->params.xmit_policy > BOND_XMIT_POLICY_LAYER23) { memset(fk, 0, sizeof(*fk)); return __skb_flow_dissect(NULL, skb, &flow_keys_bonding, - fk, NULL, 0, 0, 0, 0); + fk, data, l2_proto, nhoff, hlen, 0); } fk->ports.ports = 0; memset(&fk->icmp, 0, sizeof(fk->icmp)); - noff = skb_network_offset(skb); - if (!bond_flow_ip(skb, fk, &noff, &proto, l34)) + if (!bond_flow_ip(skb, fk, data, hlen, l2_proto, &nhoff, &ip_proto, l34)) return false; /* ICMP error packets contains at least 8 bytes of the header @@ -3677,51 +3754,41 @@ static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb, * to correlate ICMP error packets within the same flow which * generated the error. */ - if (proto == IPPROTO_ICMP || proto == IPPROTO_ICMPV6) { - skb_flow_get_icmp_tci(skb, &fk->icmp, skb->data, - skb_transport_offset(skb), - skb_headlen(skb)); - if (proto == IPPROTO_ICMP) { + if (ip_proto == IPPROTO_ICMP || ip_proto == IPPROTO_ICMPV6) { + skb_flow_get_icmp_tci(skb, &fk->icmp, data, nhoff, hlen); + if (ip_proto == IPPROTO_ICMP) { if (!icmp_is_err(fk->icmp.type)) return true; - noff += sizeof(struct icmphdr); - } else if (proto == IPPROTO_ICMPV6) { + nhoff += sizeof(struct icmphdr); + } else if (ip_proto == IPPROTO_ICMPV6) { if (!icmpv6_is_err(fk->icmp.type)) return true; - noff += sizeof(struct icmp6hdr); + nhoff += sizeof(struct icmp6hdr); } - return bond_flow_ip(skb, fk, &noff, &proto, l34); + return bond_flow_ip(skb, fk, data, hlen, l2_proto, &nhoff, &ip_proto, l34); } return true; } - -/** - * bond_xmit_hash - generate a hash value based on the xmit policy - * @bond: bonding device - * @skb: buffer to use for headers - * - * This function will extract the necessary headers from the skb buffer and use - * them to generate a hash based on the xmit_policy set in the bonding device +/* Generate hash based on xmit policy. If @skb is given it is used to linearize + * the data as required, but this function can be used without it if the data is + * known to be linear (e.g. with xdp_buff). */ -u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb) +static u32 __bond_xmit_hash(struct bonding *bond, struct sk_buff *skb, void *data, + __be16 l2_proto, int mhoff, int nhoff, int hlen) { struct flow_keys flow; u32 hash; - if (bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP34 && - skb->l4_hash) - return skb->hash; - if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER2 || - !bond_flow_dissect(bond, skb, &flow)) - return bond_eth_hash(skb); + !bond_flow_dissect(bond, skb, data, l2_proto, nhoff, hlen, &flow)) + return bond_eth_hash(skb, data, mhoff, hlen); if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER23 || bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP23) { - hash = bond_eth_hash(skb); + hash = bond_eth_hash(skb, data, mhoff, hlen); } else { if (flow.icmp.id) memcpy(&hash, &flow.icmp, sizeof(hash)); @@ -3736,6 +3803,45 @@ u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb) return hash >> 1; } +/** + * bond_xmit_hash - generate a hash value based on the xmit policy + * @bond: bonding device + * @skb: buffer to use for headers + * + * This function will extract the necessary headers from the skb buffer and use + * them to generate a hash based on the xmit_policy set in the bonding device + */ +u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb) +{ + if (bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP34 && + skb->l4_hash) + return skb->hash; + + return __bond_xmit_hash(bond, skb, skb->data, skb->protocol, + skb_mac_offset(skb), skb_network_offset(skb), + skb_headlen(skb)); +} + +/** + * bond_xmit_hash_xdp - generate a hash value based on the xmit policy + * @bond: bonding device + * @xdp: buffer to use for headers + * + * The XDP variant of bond_xmit_hash. + */ +static u32 bond_xmit_hash_xdp(struct bonding *bond, struct xdp_buff *xdp) +{ + struct ethhdr *eth; + + if (xdp->data + sizeof(struct ethhdr) > xdp->data_end) + return 0; + + eth = (struct ethhdr *)xdp->data; + + return __bond_xmit_hash(bond, NULL, xdp->data, eth->h_proto, 0, + sizeof(struct ethhdr), xdp->data_end - xdp->data); +} + /*-------------------------- Device entry points ----------------------------*/ void bond_work_init_all(struct bonding *bond) @@ -3765,6 +3871,12 @@ static int bond_open(struct net_device *bond_dev) struct list_head *iter; struct slave *slave; + if (BOND_MODE(bond) == BOND_MODE_ROUNDROBIN && !bond->rr_tx_counter) { + bond->rr_tx_counter = alloc_percpu(u32); + if (!bond->rr_tx_counter) + return -ENOMEM; + } + /* reset slave->backup and slave->inactive */ if (bond_has_slaves(bond)) { bond_for_each_slave(bond, slave, iter) { @@ -4317,16 +4429,16 @@ static u32 bond_rr_gen_slave_id(struct bonding *bond) slave_id = prandom_u32(); break; case 1: - slave_id = bond->rr_tx_counter; + slave_id = this_cpu_inc_return(*bond->rr_tx_counter); break; default: reciprocal_packets_per_slave = bond->params.reciprocal_packets_per_slave; - slave_id = reciprocal_divide(bond->rr_tx_counter, + slave_id = this_cpu_inc_return(*bond->rr_tx_counter); + slave_id = reciprocal_divide(slave_id, reciprocal_packets_per_slave); break; } - bond->rr_tx_counter++; return slave_id; } @@ -4369,6 +4481,47 @@ static struct slave *bond_xmit_roundrobin_slave_get(struct bonding *bond, return NULL; } +static struct slave *bond_xdp_xmit_roundrobin_slave_get(struct bonding *bond, + struct xdp_buff *xdp) +{ + struct slave *slave; + int slave_cnt; + u32 slave_id; + const struct ethhdr *eth; + void *data = xdp->data; + + if (data + sizeof(struct ethhdr) > xdp->data_end) + goto non_igmp; + + eth = (struct ethhdr *)data; + data += sizeof(struct ethhdr); + + /* See comment on IGMP in bond_xmit_roundrobin_slave_get() */ + if (eth->h_proto == htons(ETH_P_IP)) { + const struct iphdr *iph; + + if (data + sizeof(struct iphdr) > xdp->data_end) + goto non_igmp; + + iph = (struct iphdr *)data; + + if (iph->protocol == IPPROTO_IGMP) { + slave = rcu_dereference(bond->curr_active_slave); + if (slave) + return slave; + return bond_get_slave_by_id(bond, 0); + } + } + +non_igmp: + slave_cnt = READ_ONCE(bond->slave_cnt); + if (likely(slave_cnt)) { + slave_id = bond_rr_gen_slave_id(bond) % slave_cnt; + return bond_get_slave_by_id(bond, slave_id); + } + return NULL; +} + static netdev_tx_t bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *bond_dev) { @@ -4382,8 +4535,7 @@ static netdev_tx_t bond_xmit_roundrobin(struct sk_buff *skb, return bond_tx_drop(bond_dev, skb); } -static struct slave *bond_xmit_activebackup_slave_get(struct bonding *bond, - struct sk_buff *skb) +static struct slave *bond_xmit_activebackup_slave_get(struct bonding *bond) { return rcu_dereference(bond->curr_active_slave); } @@ -4397,7 +4549,7 @@ static netdev_tx_t bond_xmit_activebackup(struct sk_buff *skb, struct bonding *bond = netdev_priv(bond_dev); struct slave *slave; - slave = bond_xmit_activebackup_slave_get(bond, skb); + slave = bond_xmit_activebackup_slave_get(bond); if (slave) return bond_dev_queue_xmit(bond, skb, slave->dev); @@ -4618,6 +4770,22 @@ static struct slave *bond_xmit_3ad_xor_slave_get(struct bonding *bond, return slave; } +static struct slave *bond_xdp_xmit_3ad_xor_slave_get(struct bonding *bond, + struct xdp_buff *xdp) +{ + struct bond_up_slave *slaves; + unsigned int count; + u32 hash; + + hash = bond_xmit_hash_xdp(bond, xdp); + slaves = rcu_dereference(bond->usable_slaves); + count = slaves ? READ_ONCE(slaves->count) : 0; + if (unlikely(!count)) + return NULL; + + return slaves->arr[hash % count]; +} + /* Use this Xmit function for 3AD as well as XOR modes. The current * usable slave array is formed in the control path. The xmit function * just calculates hash and sends the packet out. @@ -4746,7 +4914,7 @@ static struct net_device *bond_xmit_get_slave(struct net_device *master_dev, slave = bond_xmit_roundrobin_slave_get(bond, skb); break; case BOND_MODE_ACTIVEBACKUP: - slave = bond_xmit_activebackup_slave_get(bond, skb); + slave = bond_xmit_activebackup_slave_get(bond); break; case BOND_MODE_8023AD: case BOND_MODE_XOR: @@ -4826,6 +4994,173 @@ static netdev_tx_t bond_start_xmit(struct sk_buff *skb, struct net_device *dev) return ret; } +static struct net_device * +bond_xdp_get_xmit_slave(struct net_device *bond_dev, struct xdp_buff *xdp) +{ + struct bonding *bond = netdev_priv(bond_dev); + struct slave *slave; + + /* Caller needs to hold rcu_read_lock() */ + + switch (BOND_MODE(bond)) { + case BOND_MODE_ROUNDROBIN: + slave = bond_xdp_xmit_roundrobin_slave_get(bond, xdp); + break; + + case BOND_MODE_ACTIVEBACKUP: + slave = bond_xmit_activebackup_slave_get(bond); + break; + + case BOND_MODE_8023AD: + case BOND_MODE_XOR: + slave = bond_xdp_xmit_3ad_xor_slave_get(bond, xdp); + break; + + default: + /* Should never happen. Mode guarded by bond_xdp_check() */ + netdev_err(bond_dev, "Unknown bonding mode %d for xdp xmit\n", BOND_MODE(bond)); + WARN_ON_ONCE(1); + return NULL; + } + + if (slave) + return slave->dev; + + return NULL; +} + +static int bond_xdp_xmit(struct net_device *bond_dev, + int n, struct xdp_frame **frames, u32 flags) +{ + int nxmit, err = -ENXIO; + + rcu_read_lock(); + + for (nxmit = 0; nxmit < n; nxmit++) { + struct xdp_frame *frame = frames[nxmit]; + struct xdp_frame *frames1[] = {frame}; + struct net_device *slave_dev; + struct xdp_buff xdp; + + xdp_convert_frame_to_buff(frame, &xdp); + + slave_dev = bond_xdp_get_xmit_slave(bond_dev, &xdp); + if (!slave_dev) { + err = -ENXIO; + break; + } + + err = slave_dev->netdev_ops->ndo_xdp_xmit(slave_dev, 1, frames1, flags); + if (err < 1) + break; + } + + rcu_read_unlock(); + + /* If error happened on the first frame then we can pass the error up, otherwise + * report the number of frames that were xmitted. + */ + if (err < 0) + return (nxmit == 0 ? err : nxmit); + + return nxmit; +} + +static int bond_xdp_set(struct net_device *dev, struct bpf_prog *prog, + struct netlink_ext_ack *extack) +{ + struct bonding *bond = netdev_priv(dev); + struct list_head *iter; + struct slave *slave, *rollback_slave; + struct bpf_prog *old_prog; + struct netdev_bpf xdp = { + .command = XDP_SETUP_PROG, + .flags = 0, + .prog = prog, + .extack = extack, + }; + int err; + + ASSERT_RTNL(); + + if (!bond_xdp_check(bond)) + return -EOPNOTSUPP; + + old_prog = bond->xdp_prog; + bond->xdp_prog = prog; + + bond_for_each_slave(bond, slave, iter) { + struct net_device *slave_dev = slave->dev; + + if (!slave_dev->netdev_ops->ndo_bpf || + !slave_dev->netdev_ops->ndo_xdp_xmit) { + NL_SET_ERR_MSG(extack, "Slave device does not support XDP"); + slave_err(dev, slave_dev, "Slave does not support XDP\n"); + err = -EOPNOTSUPP; + goto err; + } + + if (dev_xdp_prog_count(slave_dev) > 0) { + NL_SET_ERR_MSG(extack, + "Slave has XDP program loaded, please unload before enslaving"); + slave_err(dev, slave_dev, + "Slave has XDP program loaded, please unload before enslaving\n"); + err = -EOPNOTSUPP; + goto err; + } + + err = slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp); + if (err < 0) { + /* ndo_bpf() sets extack error message */ + slave_err(dev, slave_dev, "Error %d calling ndo_bpf\n", err); + goto err; + } + if (prog) + bpf_prog_inc(prog); + } + + if (prog) { + static_branch_inc(&bpf_master_redirect_enabled_key); + } else if (old_prog) { + bpf_prog_put(old_prog); + static_branch_dec(&bpf_master_redirect_enabled_key); + } + + return 0; + +err: + /* unwind the program changes */ + bond->xdp_prog = old_prog; + xdp.prog = old_prog; + xdp.extack = NULL; /* do not overwrite original error */ + + bond_for_each_slave(bond, rollback_slave, iter) { + struct net_device *slave_dev = rollback_slave->dev; + int err_unwind; + + if (slave == rollback_slave) + break; + + err_unwind = slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp); + if (err_unwind < 0) + slave_err(dev, slave_dev, + "Error %d when unwinding XDP program change\n", err_unwind); + else if (xdp.prog) + bpf_prog_inc(xdp.prog); + } + return err; +} + +static int bond_xdp(struct net_device *dev, struct netdev_bpf *xdp) +{ + switch (xdp->command) { + case XDP_SETUP_PROG: + return bond_xdp_set(dev, xdp->prog, xdp->extack); + default: + return -EINVAL; + } +} + static u32 bond_mode_bcast_speed(struct slave *slave, u32 speed) { if (speed == 0 || speed == SPEED_UNKNOWN) @@ -4911,6 +5246,9 @@ static const struct net_device_ops bond_netdev_ops = { .ndo_fix_features = bond_fix_features, .ndo_features_check = passthru_features_check, .ndo_get_xmit_slave = bond_xmit_get_slave, + .ndo_bpf = bond_xdp, + .ndo_xdp_xmit = bond_xdp_xmit, + .ndo_xdp_get_xmit_slave = bond_xdp_get_xmit_slave, }; static const struct device_type bond_type = { @@ -4922,6 +5260,9 @@ static void bond_destructor(struct net_device *bond_dev) struct bonding *bond = netdev_priv(bond_dev); if (bond->wq) destroy_workqueue(bond->wq); + + if (bond->rr_tx_counter) + free_percpu(bond->rr_tx_counter); } void bond_setup(struct net_device *bond_dev) diff --git a/include/linux/filter.h b/include/linux/filter.h index 0418d88aa0f3d09a76c10335be84ff65c5399a58..a2c9ca9626e961083e5715aaea75a078b56e77c1 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -747,6 +747,10 @@ static inline u32 bpf_prog_run_clear_cb(const struct bpf_prog *prog, DECLARE_BPF_DISPATCHER(xdp) +DECLARE_STATIC_KEY_FALSE(bpf_master_redirect_enabled_key); + +u32 xdp_master_redirect(struct xdp_buff *xdp); + static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog, struct xdp_buff *xdp) { @@ -756,7 +760,14 @@ static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog, * already takes rcu_read_lock() when fetching the program, so * it's not necessary here anymore. */ - return __BPF_PROG_RUN(prog, xdp, BPF_DISPATCHER_FUNC(xdp)); + u32 act = __BPF_PROG_RUN(prog, xdp, BPF_DISPATCHER_FUNC(xdp)); + + if (static_branch_unlikely(&bpf_master_redirect_enabled_key)) { + if (act == XDP_TX && netif_is_bond_slave(xdp->rxq->dev)) + act = xdp_master_redirect(xdp); + } + + return act; } void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 47ebb927d4c22b503e404c6a47391fb9d1513257..ecbf3eb88e428acb4c5524fbe252a7454c9191b1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1301,6 +1301,9 @@ struct netdev_net_notifier { * that got dropped are freed/returned via xdp_return_frame(). * Returns negative number, means general error invoking ndo, meaning * no frames were xmit'ed and core-caller will free all frames. + * struct net_device *(*ndo_xdp_get_xmit_slave)(struct net_device *dev, + * struct xdp_buff *xdp); + * Get the xmit slave of master device based on the xdp_buff. * int (*ndo_xsk_wakeup)(struct net_device *dev, u32 queue_id, u32 flags); * This function is used to wake up the softirq, ksoftirqd or kthread * responsible for sending and/or receiving packets on a specific @@ -1526,7 +1529,8 @@ struct net_device_ops { struct ip_tunnel_parm *p, int cmd); struct net_device * (*ndo_get_peer_dev)(struct net_device *dev); - KABI_RESERVE(1) + KABI_USE(1, struct net_device *(*ndo_xdp_get_xmit_slave)(struct net_device *dev, + struct xdp_buff *xdp)) KABI_RESERVE(2) KABI_RESERVE(3) KABI_RESERVE(4) @@ -3994,6 +3998,7 @@ typedef int (*bpf_op_t)(struct net_device *dev, struct netdev_bpf *bpf); int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, int fd, int expected_fd, u32 flags); int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); +u8 dev_xdp_prog_count(struct net_device *dev); u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode); int xdp_umem_query(struct net_device *dev, u16 queue_id); diff --git a/include/net/bonding.h b/include/net/bonding.h index 467c9dac43cab76d02e5ee2dbcfd2be24bf9f2ae..7b07700477fc2ca5d6e944ff4adb0aacf261feb5 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -237,7 +237,7 @@ struct bonding { char proc_file_name[IFNAMSIZ]; #endif /* CONFIG_PROC_FS */ struct list_head bond_list; - u32 rr_tx_counter; + u32 __percpu *rr_tx_counter; struct ad_bond_info ad_info; struct alb_bond_info alb_info; struct bond_params params; @@ -258,6 +258,7 @@ struct bonding { /* protecting ipsec_list */ spinlock_t ipsec_lock; #endif /* CONFIG_XFRM_OFFLOAD */ + struct bpf_prog *xdp_prog; }; #define bond_slave_get_rcu(dev) \ diff --git a/net/core/dev.c b/net/core/dev.c index 02bf39500817a09a5976bd11de659ef3f6e40bd3..5a1994be7d843d95c1539728f3baa078bffa5738 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -9037,7 +9037,7 @@ static struct bpf_prog *dev_xdp_prog(struct net_device *dev, return dev->xdp_state[mode].prog; } -static u8 dev_xdp_prog_count(struct net_device *dev) +u8 dev_xdp_prog_count(struct net_device *dev) { u8 count = 0; int i; @@ -9047,6 +9047,7 @@ static u8 dev_xdp_prog_count(struct net_device *dev) count++; return count; } +EXPORT_SYMBOL_GPL(dev_xdp_prog_count); u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode) { @@ -9140,6 +9141,8 @@ static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack { unsigned int num_modes = hweight32(flags & XDP_FLAGS_MODES); struct bpf_prog *cur_prog; + struct net_device *upper; + struct list_head *iter; enum bpf_xdp_mode mode; bpf_op_t bpf_op; int err; @@ -9178,6 +9181,14 @@ static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack return -EBUSY; } + /* don't allow if an upper device already has a program */ + netdev_for_each_upper_dev_rcu(dev, upper, iter) { + if (dev_xdp_prog_count(upper) > 0) { + NL_SET_ERR_MSG(extack, "Cannot attach when an upper device already has a program"); + return -EEXIST; + } + } + cur_prog = dev_xdp_prog(dev, mode); /* can't replace attached prog with link */ if (link && cur_prog) { diff --git a/net/core/filter.c b/net/core/filter.c index 2d3d5f16819657c931b41066502817768b7736d6..0305ca056b8c89f279b0352ff4c5c83260045ada 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3990,6 +3990,31 @@ void bpf_clear_redirect_map(struct bpf_map *map) } } +DEFINE_STATIC_KEY_FALSE(bpf_master_redirect_enabled_key); +EXPORT_SYMBOL_GPL(bpf_master_redirect_enabled_key); + +u32 xdp_master_redirect(struct xdp_buff *xdp) +{ + struct net_device *master, *slave; + struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); + + master = netdev_master_upper_dev_get_rcu(xdp->rxq->dev); + slave = master->netdev_ops->ndo_xdp_get_xmit_slave(master, xdp); + if (slave && slave != xdp->rxq->dev) { + /* The target device is different from the receiving device, so + * redirect it to the new device. + * Using XDP_REDIRECT gets the correct behaviour from XDP enabled + * drivers to unmap the packet from their rx ring. + */ + ri->tgt_index = slave->ifindex; + //ri->map_id = INT_MAX; TODO: how to usage? + //ri->map_type = BPF_MAP_TYPE_UNSPEC; + return XDP_REDIRECT; + } + return XDP_TX; +} +EXPORT_SYMBOL_GPL(xdp_master_redirect); + int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp, struct bpf_prog *xdp_prog) {