未验证 提交 516124de 编写于 作者: O openeuler-ci-bot 提交者: Gitee

!1520 enable bonding XDP

Merge Pull Request from: @ci-robot 
 
PR sync from: Zhengchao Shao <shaozhengchao@huawei.com>
https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/2ZEFKDERW5AM7E6QSKS2G7GLFUFEZXLL/ 
Currently, the 10G 82599 network adapter is used for the test on the
Intel(R) Xeon(R) Gold 6138 CPU @ 2.00 GHz platform. The peer end uses
the pktgen of the dpdk to send 64 bytes packets. The test performance
is as follows:

 without patch (1 dev):
   XDP_DROP:              1.00%      14.69Mpps
   XDP_TX:                2.20%      10.70Mpps    10.70Mpps
   XDP_DROP (RSS):        1.90%      10.13Mpps
   XDP_TX (RSS):          6.20%      10.09Mpps    10.09Mpps
 -----------------------
 with patch, bond (1 dev):
   XDP_DROP:              1.00%      14.69Mpps
   XDP_TX:                2.10%       6.84Mpps     6.84Mpps
   XDP_DROP (RSS):        2.00%      10.20pps
   XDP_TX (RSS):          8.40%      10.17Mpps    10.17Mpps
 -----------------------
 with patch, bond (2 devs):
   XDP_DROP:              1.90%      29.3Mpps
   XDP_TX:                4.20%      11.31Mpps    11.31Mpps
   XDP_DROP (RSS):        3.90%      20.40Mpps
   XDP_TX (RSS):         16.80%      18.55Mpps    18.55Mpps
 --------------------------------------------------------------

Jonathan Toppins (1):
  bonding: fix NULL deref in bond_rr_gen_slave_id

Jussi Maki (5):
  net, bonding: Refactor bond_xmit_hash for use with xdp_buff
  net, core: Add support for XDP redirection to slave device
  net, bonding: Add XDP support to the bonding driver
  bonding: Fix negative jump label count on nested bonding
  net: bonding: Use per-cpu rr_tx_counter

Moshe Tal (1):
  bonding: Fix extraction of ports from the packet headers

Zhengchao Shao (1):
  net: fix kabi change in struct net_device_ops


-- 
2.34.1
 
https://gitee.com/openeuler/kernel/issues/I7NDRB 
 
Link:https://gitee.com/openeuler/kernel/pulls/1520 

Reviewed-by: Yue Haibing <yuehaibing@huawei.com> 
Signed-off-by: Jialin Zhang <zhangjialin11@huawei.com> 
...@@ -302,6 +302,19 @@ netdev_tx_t bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb, ...@@ -302,6 +302,19 @@ netdev_tx_t bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb,
return dev_queue_xmit(skb); return dev_queue_xmit(skb);
} }
static bool bond_xdp_check(struct bonding *bond)
{
switch (BOND_MODE(bond)) {
case BOND_MODE_ROUNDROBIN:
case BOND_MODE_ACTIVEBACKUP:
case BOND_MODE_8023AD:
case BOND_MODE_XOR:
return true;
default:
return false;
}
}
/*---------------------------------- VLAN -----------------------------------*/ /*---------------------------------- VLAN -----------------------------------*/
/* In the following 2 functions, bond_vlan_rx_add_vid and bond_vlan_rx_kill_vid, /* In the following 2 functions, bond_vlan_rx_add_vid and bond_vlan_rx_kill_vid,
...@@ -2118,6 +2131,41 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, ...@@ -2118,6 +2131,41 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
bond_update_slave_arr(bond, NULL); bond_update_slave_arr(bond, NULL);
if (!slave_dev->netdev_ops->ndo_bpf ||
!slave_dev->netdev_ops->ndo_xdp_xmit) {
if (bond->xdp_prog) {
NL_SET_ERR_MSG(extack, "Slave does not support XDP");
slave_err(bond_dev, slave_dev, "Slave does not support XDP\n");
res = -EOPNOTSUPP;
goto err_sysfs_del;
}
} else if (bond->xdp_prog) {
struct netdev_bpf xdp = {
.command = XDP_SETUP_PROG,
.flags = 0,
.prog = bond->xdp_prog,
.extack = extack,
};
if (dev_xdp_prog_count(slave_dev) > 0) {
NL_SET_ERR_MSG(extack,
"Slave has XDP program loaded, please unload before enslaving");
slave_err(bond_dev, slave_dev,
"Slave has XDP program loaded, please unload before enslaving\n");
res = -EOPNOTSUPP;
goto err_sysfs_del;
}
res = slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp);
if (res < 0) {
/* ndo_bpf() sets extack error message */
slave_dbg(bond_dev, slave_dev, "Error %d calling ndo_bpf\n", res);
goto err_sysfs_del;
}
if (bond->xdp_prog)
bpf_prog_inc(bond->xdp_prog);
}
slave_info(bond_dev, slave_dev, "Enslaving as %s interface with %s link\n", slave_info(bond_dev, slave_dev, "Enslaving as %s interface with %s link\n",
bond_is_active_slave(new_slave) ? "an active" : "a backup", bond_is_active_slave(new_slave) ? "an active" : "a backup",
new_slave->link != BOND_LINK_DOWN ? "an up" : "a down"); new_slave->link != BOND_LINK_DOWN ? "an up" : "a down");
...@@ -2235,6 +2283,17 @@ static int __bond_release_one(struct net_device *bond_dev, ...@@ -2235,6 +2283,17 @@ static int __bond_release_one(struct net_device *bond_dev,
/* recompute stats just before removing the slave */ /* recompute stats just before removing the slave */
bond_get_stats(bond->dev, &bond->bond_stats); bond_get_stats(bond->dev, &bond->bond_stats);
if (bond->xdp_prog) {
struct netdev_bpf xdp = {
.command = XDP_SETUP_PROG,
.flags = 0,
.prog = NULL,
.extack = NULL,
};
if (slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp))
slave_warn(bond_dev, slave_dev, "failed to unload XDP program\n");
}
/* unregister rx_handler early so bond_handle_frame wouldn't be called /* unregister rx_handler early so bond_handle_frame wouldn't be called
* for this slave anymore. * for this slave anymore.
*/ */
...@@ -3610,66 +3669,84 @@ static struct notifier_block bond_netdev_notifier = { ...@@ -3610,66 +3669,84 @@ static struct notifier_block bond_netdev_notifier = {
}; };
/*---------------------------- Hashing Policies -----------------------------*/ /*---------------------------- Hashing Policies -----------------------------*/
/* Helper to access data in a packet, with or without a backing skb.
* If skb is given the data is linearized if necessary via pskb_may_pull.
*/
static inline void *bond_pull_data(struct sk_buff *skb,
void *data, int hlen, int n)
{
if (likely(n <= hlen))
return data;
else if (skb && likely(pskb_may_pull(skb, n)))
return skb->head;
return NULL;
}
/* L2 hash helper */ /* L2 hash helper */
static inline u32 bond_eth_hash(struct sk_buff *skb) static inline u32 bond_eth_hash(struct sk_buff *skb, void *data, int mhoff, int hlen)
{ {
struct ethhdr *ep, hdr_tmp; struct ethhdr *ep;
ep = skb_header_pointer(skb, 0, sizeof(hdr_tmp), &hdr_tmp); data = bond_pull_data(skb, data, hlen, mhoff + sizeof(struct ethhdr));
if (ep) if (!data)
return ep->h_dest[5] ^ ep->h_source[5] ^ ep->h_proto; return 0;
return 0;
ep = (struct ethhdr *)(data + mhoff);
return ep->h_dest[5] ^ ep->h_source[5] ^ be16_to_cpu(ep->h_proto);
} }
static bool bond_flow_ip(struct sk_buff *skb, struct flow_keys *fk, static bool bond_flow_ip(struct sk_buff *skb, struct flow_keys *fk, void *data,
int *noff, int *proto, bool l34) int hlen, __be16 l2_proto, int *nhoff, int *ip_proto, bool l34)
{ {
const struct ipv6hdr *iph6; const struct ipv6hdr *iph6;
const struct iphdr *iph; const struct iphdr *iph;
if (skb->protocol == htons(ETH_P_IP)) { if (l2_proto == htons(ETH_P_IP)) {
if (unlikely(!pskb_may_pull(skb, *noff + sizeof(*iph)))) data = bond_pull_data(skb, data, hlen, *nhoff + sizeof(*iph));
if (!data)
return false; return false;
iph = (const struct iphdr *)(skb->data + *noff);
iph = (const struct iphdr *)(data + *nhoff);
iph_to_flow_copy_v4addrs(fk, iph); iph_to_flow_copy_v4addrs(fk, iph);
*noff += iph->ihl << 2; *nhoff += iph->ihl << 2;
if (!ip_is_fragment(iph)) if (!ip_is_fragment(iph))
*proto = iph->protocol; *ip_proto = iph->protocol;
} else if (skb->protocol == htons(ETH_P_IPV6)) { } else if (l2_proto == htons(ETH_P_IPV6)) {
if (unlikely(!pskb_may_pull(skb, *noff + sizeof(*iph6)))) data = bond_pull_data(skb, data, hlen, *nhoff + sizeof(*iph6));
if (!data)
return false; return false;
iph6 = (const struct ipv6hdr *)(skb->data + *noff);
iph6 = (const struct ipv6hdr *)(data + *nhoff);
iph_to_flow_copy_v6addrs(fk, iph6); iph_to_flow_copy_v6addrs(fk, iph6);
*noff += sizeof(*iph6); *nhoff += sizeof(*iph6);
*proto = iph6->nexthdr; *ip_proto = iph6->nexthdr;
} else { } else {
return false; return false;
} }
if (l34 && *proto >= 0) if (l34 && *ip_proto >= 0)
fk->ports.ports = skb_flow_get_ports(skb, *noff, *proto); fk->ports.ports = __skb_flow_get_ports(skb, *nhoff, *ip_proto, data, hlen);
return true; return true;
} }
/* Extract the appropriate headers based on bond's xmit policy */ /* Extract the appropriate headers based on bond's xmit policy */
static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb, static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb, void *data,
struct flow_keys *fk) __be16 l2_proto, int nhoff, int hlen, struct flow_keys *fk)
{ {
bool l34 = bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34; bool l34 = bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34;
int noff, proto = -1; int ip_proto = -1;
if (bond->params.xmit_policy > BOND_XMIT_POLICY_LAYER23) { if (bond->params.xmit_policy > BOND_XMIT_POLICY_LAYER23) {
memset(fk, 0, sizeof(*fk)); memset(fk, 0, sizeof(*fk));
return __skb_flow_dissect(NULL, skb, &flow_keys_bonding, return __skb_flow_dissect(NULL, skb, &flow_keys_bonding,
fk, NULL, 0, 0, 0, 0); fk, data, l2_proto, nhoff, hlen, 0);
} }
fk->ports.ports = 0; fk->ports.ports = 0;
memset(&fk->icmp, 0, sizeof(fk->icmp)); memset(&fk->icmp, 0, sizeof(fk->icmp));
noff = skb_network_offset(skb); if (!bond_flow_ip(skb, fk, data, hlen, l2_proto, &nhoff, &ip_proto, l34))
if (!bond_flow_ip(skb, fk, &noff, &proto, l34))
return false; return false;
/* ICMP error packets contains at least 8 bytes of the header /* ICMP error packets contains at least 8 bytes of the header
...@@ -3677,51 +3754,41 @@ static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb, ...@@ -3677,51 +3754,41 @@ static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb,
* to correlate ICMP error packets within the same flow which * to correlate ICMP error packets within the same flow which
* generated the error. * generated the error.
*/ */
if (proto == IPPROTO_ICMP || proto == IPPROTO_ICMPV6) { if (ip_proto == IPPROTO_ICMP || ip_proto == IPPROTO_ICMPV6) {
skb_flow_get_icmp_tci(skb, &fk->icmp, skb->data, skb_flow_get_icmp_tci(skb, &fk->icmp, data, nhoff, hlen);
skb_transport_offset(skb), if (ip_proto == IPPROTO_ICMP) {
skb_headlen(skb));
if (proto == IPPROTO_ICMP) {
if (!icmp_is_err(fk->icmp.type)) if (!icmp_is_err(fk->icmp.type))
return true; return true;
noff += sizeof(struct icmphdr); nhoff += sizeof(struct icmphdr);
} else if (proto == IPPROTO_ICMPV6) { } else if (ip_proto == IPPROTO_ICMPV6) {
if (!icmpv6_is_err(fk->icmp.type)) if (!icmpv6_is_err(fk->icmp.type))
return true; return true;
noff += sizeof(struct icmp6hdr); nhoff += sizeof(struct icmp6hdr);
} }
return bond_flow_ip(skb, fk, &noff, &proto, l34); return bond_flow_ip(skb, fk, data, hlen, l2_proto, &nhoff, &ip_proto, l34);
} }
return true; return true;
} }
/* Generate hash based on xmit policy. If @skb is given it is used to linearize
/** * the data as required, but this function can be used without it if the data is
* bond_xmit_hash - generate a hash value based on the xmit policy * known to be linear (e.g. with xdp_buff).
* @bond: bonding device
* @skb: buffer to use for headers
*
* This function will extract the necessary headers from the skb buffer and use
* them to generate a hash based on the xmit_policy set in the bonding device
*/ */
u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb) static u32 __bond_xmit_hash(struct bonding *bond, struct sk_buff *skb, void *data,
__be16 l2_proto, int mhoff, int nhoff, int hlen)
{ {
struct flow_keys flow; struct flow_keys flow;
u32 hash; u32 hash;
if (bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP34 &&
skb->l4_hash)
return skb->hash;
if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER2 || if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER2 ||
!bond_flow_dissect(bond, skb, &flow)) !bond_flow_dissect(bond, skb, data, l2_proto, nhoff, hlen, &flow))
return bond_eth_hash(skb); return bond_eth_hash(skb, data, mhoff, hlen);
if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER23 || if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER23 ||
bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP23) { bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP23) {
hash = bond_eth_hash(skb); hash = bond_eth_hash(skb, data, mhoff, hlen);
} else { } else {
if (flow.icmp.id) if (flow.icmp.id)
memcpy(&hash, &flow.icmp, sizeof(hash)); memcpy(&hash, &flow.icmp, sizeof(hash));
...@@ -3736,6 +3803,45 @@ u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb) ...@@ -3736,6 +3803,45 @@ u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb)
return hash >> 1; return hash >> 1;
} }
/**
* bond_xmit_hash - generate a hash value based on the xmit policy
* @bond: bonding device
* @skb: buffer to use for headers
*
* This function will extract the necessary headers from the skb buffer and use
* them to generate a hash based on the xmit_policy set in the bonding device
*/
u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb)
{
if (bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP34 &&
skb->l4_hash)
return skb->hash;
return __bond_xmit_hash(bond, skb, skb->data, skb->protocol,
skb_mac_offset(skb), skb_network_offset(skb),
skb_headlen(skb));
}
/**
* bond_xmit_hash_xdp - generate a hash value based on the xmit policy
* @bond: bonding device
* @xdp: buffer to use for headers
*
* The XDP variant of bond_xmit_hash.
*/
static u32 bond_xmit_hash_xdp(struct bonding *bond, struct xdp_buff *xdp)
{
struct ethhdr *eth;
if (xdp->data + sizeof(struct ethhdr) > xdp->data_end)
return 0;
eth = (struct ethhdr *)xdp->data;
return __bond_xmit_hash(bond, NULL, xdp->data, eth->h_proto, 0,
sizeof(struct ethhdr), xdp->data_end - xdp->data);
}
/*-------------------------- Device entry points ----------------------------*/ /*-------------------------- Device entry points ----------------------------*/
void bond_work_init_all(struct bonding *bond) void bond_work_init_all(struct bonding *bond)
...@@ -3765,6 +3871,12 @@ static int bond_open(struct net_device *bond_dev) ...@@ -3765,6 +3871,12 @@ static int bond_open(struct net_device *bond_dev)
struct list_head *iter; struct list_head *iter;
struct slave *slave; struct slave *slave;
if (BOND_MODE(bond) == BOND_MODE_ROUNDROBIN && !bond->rr_tx_counter) {
bond->rr_tx_counter = alloc_percpu(u32);
if (!bond->rr_tx_counter)
return -ENOMEM;
}
/* reset slave->backup and slave->inactive */ /* reset slave->backup and slave->inactive */
if (bond_has_slaves(bond)) { if (bond_has_slaves(bond)) {
bond_for_each_slave(bond, slave, iter) { bond_for_each_slave(bond, slave, iter) {
...@@ -4317,16 +4429,16 @@ static u32 bond_rr_gen_slave_id(struct bonding *bond) ...@@ -4317,16 +4429,16 @@ static u32 bond_rr_gen_slave_id(struct bonding *bond)
slave_id = prandom_u32(); slave_id = prandom_u32();
break; break;
case 1: case 1:
slave_id = bond->rr_tx_counter; slave_id = this_cpu_inc_return(*bond->rr_tx_counter);
break; break;
default: default:
reciprocal_packets_per_slave = reciprocal_packets_per_slave =
bond->params.reciprocal_packets_per_slave; bond->params.reciprocal_packets_per_slave;
slave_id = reciprocal_divide(bond->rr_tx_counter, slave_id = this_cpu_inc_return(*bond->rr_tx_counter);
slave_id = reciprocal_divide(slave_id,
reciprocal_packets_per_slave); reciprocal_packets_per_slave);
break; break;
} }
bond->rr_tx_counter++;
return slave_id; return slave_id;
} }
...@@ -4369,6 +4481,47 @@ static struct slave *bond_xmit_roundrobin_slave_get(struct bonding *bond, ...@@ -4369,6 +4481,47 @@ static struct slave *bond_xmit_roundrobin_slave_get(struct bonding *bond,
return NULL; return NULL;
} }
static struct slave *bond_xdp_xmit_roundrobin_slave_get(struct bonding *bond,
struct xdp_buff *xdp)
{
struct slave *slave;
int slave_cnt;
u32 slave_id;
const struct ethhdr *eth;
void *data = xdp->data;
if (data + sizeof(struct ethhdr) > xdp->data_end)
goto non_igmp;
eth = (struct ethhdr *)data;
data += sizeof(struct ethhdr);
/* See comment on IGMP in bond_xmit_roundrobin_slave_get() */
if (eth->h_proto == htons(ETH_P_IP)) {
const struct iphdr *iph;
if (data + sizeof(struct iphdr) > xdp->data_end)
goto non_igmp;
iph = (struct iphdr *)data;
if (iph->protocol == IPPROTO_IGMP) {
slave = rcu_dereference(bond->curr_active_slave);
if (slave)
return slave;
return bond_get_slave_by_id(bond, 0);
}
}
non_igmp:
slave_cnt = READ_ONCE(bond->slave_cnt);
if (likely(slave_cnt)) {
slave_id = bond_rr_gen_slave_id(bond) % slave_cnt;
return bond_get_slave_by_id(bond, slave_id);
}
return NULL;
}
static netdev_tx_t bond_xmit_roundrobin(struct sk_buff *skb, static netdev_tx_t bond_xmit_roundrobin(struct sk_buff *skb,
struct net_device *bond_dev) struct net_device *bond_dev)
{ {
...@@ -4382,8 +4535,7 @@ static netdev_tx_t bond_xmit_roundrobin(struct sk_buff *skb, ...@@ -4382,8 +4535,7 @@ static netdev_tx_t bond_xmit_roundrobin(struct sk_buff *skb,
return bond_tx_drop(bond_dev, skb); return bond_tx_drop(bond_dev, skb);
} }
static struct slave *bond_xmit_activebackup_slave_get(struct bonding *bond, static struct slave *bond_xmit_activebackup_slave_get(struct bonding *bond)
struct sk_buff *skb)
{ {
return rcu_dereference(bond->curr_active_slave); return rcu_dereference(bond->curr_active_slave);
} }
...@@ -4397,7 +4549,7 @@ static netdev_tx_t bond_xmit_activebackup(struct sk_buff *skb, ...@@ -4397,7 +4549,7 @@ static netdev_tx_t bond_xmit_activebackup(struct sk_buff *skb,
struct bonding *bond = netdev_priv(bond_dev); struct bonding *bond = netdev_priv(bond_dev);
struct slave *slave; struct slave *slave;
slave = bond_xmit_activebackup_slave_get(bond, skb); slave = bond_xmit_activebackup_slave_get(bond);
if (slave) if (slave)
return bond_dev_queue_xmit(bond, skb, slave->dev); return bond_dev_queue_xmit(bond, skb, slave->dev);
...@@ -4618,6 +4770,22 @@ static struct slave *bond_xmit_3ad_xor_slave_get(struct bonding *bond, ...@@ -4618,6 +4770,22 @@ static struct slave *bond_xmit_3ad_xor_slave_get(struct bonding *bond,
return slave; return slave;
} }
static struct slave *bond_xdp_xmit_3ad_xor_slave_get(struct bonding *bond,
struct xdp_buff *xdp)
{
struct bond_up_slave *slaves;
unsigned int count;
u32 hash;
hash = bond_xmit_hash_xdp(bond, xdp);
slaves = rcu_dereference(bond->usable_slaves);
count = slaves ? READ_ONCE(slaves->count) : 0;
if (unlikely(!count))
return NULL;
return slaves->arr[hash % count];
}
/* Use this Xmit function for 3AD as well as XOR modes. The current /* Use this Xmit function for 3AD as well as XOR modes. The current
* usable slave array is formed in the control path. The xmit function * usable slave array is formed in the control path. The xmit function
* just calculates hash and sends the packet out. * just calculates hash and sends the packet out.
...@@ -4746,7 +4914,7 @@ static struct net_device *bond_xmit_get_slave(struct net_device *master_dev, ...@@ -4746,7 +4914,7 @@ static struct net_device *bond_xmit_get_slave(struct net_device *master_dev,
slave = bond_xmit_roundrobin_slave_get(bond, skb); slave = bond_xmit_roundrobin_slave_get(bond, skb);
break; break;
case BOND_MODE_ACTIVEBACKUP: case BOND_MODE_ACTIVEBACKUP:
slave = bond_xmit_activebackup_slave_get(bond, skb); slave = bond_xmit_activebackup_slave_get(bond);
break; break;
case BOND_MODE_8023AD: case BOND_MODE_8023AD:
case BOND_MODE_XOR: case BOND_MODE_XOR:
...@@ -4826,6 +4994,173 @@ static netdev_tx_t bond_start_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -4826,6 +4994,173 @@ static netdev_tx_t bond_start_xmit(struct sk_buff *skb, struct net_device *dev)
return ret; return ret;
} }
static struct net_device *
bond_xdp_get_xmit_slave(struct net_device *bond_dev, struct xdp_buff *xdp)
{
struct bonding *bond = netdev_priv(bond_dev);
struct slave *slave;
/* Caller needs to hold rcu_read_lock() */
switch (BOND_MODE(bond)) {
case BOND_MODE_ROUNDROBIN:
slave = bond_xdp_xmit_roundrobin_slave_get(bond, xdp);
break;
case BOND_MODE_ACTIVEBACKUP:
slave = bond_xmit_activebackup_slave_get(bond);
break;
case BOND_MODE_8023AD:
case BOND_MODE_XOR:
slave = bond_xdp_xmit_3ad_xor_slave_get(bond, xdp);
break;
default:
/* Should never happen. Mode guarded by bond_xdp_check() */
netdev_err(bond_dev, "Unknown bonding mode %d for xdp xmit\n", BOND_MODE(bond));
WARN_ON_ONCE(1);
return NULL;
}
if (slave)
return slave->dev;
return NULL;
}
static int bond_xdp_xmit(struct net_device *bond_dev,
int n, struct xdp_frame **frames, u32 flags)
{
int nxmit, err = -ENXIO;
rcu_read_lock();
for (nxmit = 0; nxmit < n; nxmit++) {
struct xdp_frame *frame = frames[nxmit];
struct xdp_frame *frames1[] = {frame};
struct net_device *slave_dev;
struct xdp_buff xdp;
xdp_convert_frame_to_buff(frame, &xdp);
slave_dev = bond_xdp_get_xmit_slave(bond_dev, &xdp);
if (!slave_dev) {
err = -ENXIO;
break;
}
err = slave_dev->netdev_ops->ndo_xdp_xmit(slave_dev, 1, frames1, flags);
if (err < 1)
break;
}
rcu_read_unlock();
/* If error happened on the first frame then we can pass the error up, otherwise
* report the number of frames that were xmitted.
*/
if (err < 0)
return (nxmit == 0 ? err : nxmit);
return nxmit;
}
static int bond_xdp_set(struct net_device *dev, struct bpf_prog *prog,
struct netlink_ext_ack *extack)
{
struct bonding *bond = netdev_priv(dev);
struct list_head *iter;
struct slave *slave, *rollback_slave;
struct bpf_prog *old_prog;
struct netdev_bpf xdp = {
.command = XDP_SETUP_PROG,
.flags = 0,
.prog = prog,
.extack = extack,
};
int err;
ASSERT_RTNL();
if (!bond_xdp_check(bond))
return -EOPNOTSUPP;
old_prog = bond->xdp_prog;
bond->xdp_prog = prog;
bond_for_each_slave(bond, slave, iter) {
struct net_device *slave_dev = slave->dev;
if (!slave_dev->netdev_ops->ndo_bpf ||
!slave_dev->netdev_ops->ndo_xdp_xmit) {
NL_SET_ERR_MSG(extack, "Slave device does not support XDP");
slave_err(dev, slave_dev, "Slave does not support XDP\n");
err = -EOPNOTSUPP;
goto err;
}
if (dev_xdp_prog_count(slave_dev) > 0) {
NL_SET_ERR_MSG(extack,
"Slave has XDP program loaded, please unload before enslaving");
slave_err(dev, slave_dev,
"Slave has XDP program loaded, please unload before enslaving\n");
err = -EOPNOTSUPP;
goto err;
}
err = slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp);
if (err < 0) {
/* ndo_bpf() sets extack error message */
slave_err(dev, slave_dev, "Error %d calling ndo_bpf\n", err);
goto err;
}
if (prog)
bpf_prog_inc(prog);
}
if (prog) {
static_branch_inc(&bpf_master_redirect_enabled_key);
} else if (old_prog) {
bpf_prog_put(old_prog);
static_branch_dec(&bpf_master_redirect_enabled_key);
}
return 0;
err:
/* unwind the program changes */
bond->xdp_prog = old_prog;
xdp.prog = old_prog;
xdp.extack = NULL; /* do not overwrite original error */
bond_for_each_slave(bond, rollback_slave, iter) {
struct net_device *slave_dev = rollback_slave->dev;
int err_unwind;
if (slave == rollback_slave)
break;
err_unwind = slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp);
if (err_unwind < 0)
slave_err(dev, slave_dev,
"Error %d when unwinding XDP program change\n", err_unwind);
else if (xdp.prog)
bpf_prog_inc(xdp.prog);
}
return err;
}
static int bond_xdp(struct net_device *dev, struct netdev_bpf *xdp)
{
switch (xdp->command) {
case XDP_SETUP_PROG:
return bond_xdp_set(dev, xdp->prog, xdp->extack);
default:
return -EINVAL;
}
}
static u32 bond_mode_bcast_speed(struct slave *slave, u32 speed) static u32 bond_mode_bcast_speed(struct slave *slave, u32 speed)
{ {
if (speed == 0 || speed == SPEED_UNKNOWN) if (speed == 0 || speed == SPEED_UNKNOWN)
...@@ -4911,6 +5246,9 @@ static const struct net_device_ops bond_netdev_ops = { ...@@ -4911,6 +5246,9 @@ static const struct net_device_ops bond_netdev_ops = {
.ndo_fix_features = bond_fix_features, .ndo_fix_features = bond_fix_features,
.ndo_features_check = passthru_features_check, .ndo_features_check = passthru_features_check,
.ndo_get_xmit_slave = bond_xmit_get_slave, .ndo_get_xmit_slave = bond_xmit_get_slave,
.ndo_bpf = bond_xdp,
.ndo_xdp_xmit = bond_xdp_xmit,
.ndo_xdp_get_xmit_slave = bond_xdp_get_xmit_slave,
}; };
static const struct device_type bond_type = { static const struct device_type bond_type = {
...@@ -4922,6 +5260,9 @@ static void bond_destructor(struct net_device *bond_dev) ...@@ -4922,6 +5260,9 @@ static void bond_destructor(struct net_device *bond_dev)
struct bonding *bond = netdev_priv(bond_dev); struct bonding *bond = netdev_priv(bond_dev);
if (bond->wq) if (bond->wq)
destroy_workqueue(bond->wq); destroy_workqueue(bond->wq);
if (bond->rr_tx_counter)
free_percpu(bond->rr_tx_counter);
} }
void bond_setup(struct net_device *bond_dev) void bond_setup(struct net_device *bond_dev)
......
...@@ -747,6 +747,10 @@ static inline u32 bpf_prog_run_clear_cb(const struct bpf_prog *prog, ...@@ -747,6 +747,10 @@ static inline u32 bpf_prog_run_clear_cb(const struct bpf_prog *prog,
DECLARE_BPF_DISPATCHER(xdp) DECLARE_BPF_DISPATCHER(xdp)
DECLARE_STATIC_KEY_FALSE(bpf_master_redirect_enabled_key);
u32 xdp_master_redirect(struct xdp_buff *xdp);
static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog, static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog,
struct xdp_buff *xdp) struct xdp_buff *xdp)
{ {
...@@ -756,7 +760,14 @@ static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog, ...@@ -756,7 +760,14 @@ static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog,
* already takes rcu_read_lock() when fetching the program, so * already takes rcu_read_lock() when fetching the program, so
* it's not necessary here anymore. * it's not necessary here anymore.
*/ */
return __BPF_PROG_RUN(prog, xdp, BPF_DISPATCHER_FUNC(xdp)); u32 act = __BPF_PROG_RUN(prog, xdp, BPF_DISPATCHER_FUNC(xdp));
if (static_branch_unlikely(&bpf_master_redirect_enabled_key)) {
if (act == XDP_TX && netif_is_bond_slave(xdp->rxq->dev))
act = xdp_master_redirect(xdp);
}
return act;
} }
void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog); void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog);
......
...@@ -1301,6 +1301,9 @@ struct netdev_net_notifier { ...@@ -1301,6 +1301,9 @@ struct netdev_net_notifier {
* that got dropped are freed/returned via xdp_return_frame(). * that got dropped are freed/returned via xdp_return_frame().
* Returns negative number, means general error invoking ndo, meaning * Returns negative number, means general error invoking ndo, meaning
* no frames were xmit'ed and core-caller will free all frames. * no frames were xmit'ed and core-caller will free all frames.
* struct net_device *(*ndo_xdp_get_xmit_slave)(struct net_device *dev,
* struct xdp_buff *xdp);
* Get the xmit slave of master device based on the xdp_buff.
* int (*ndo_xsk_wakeup)(struct net_device *dev, u32 queue_id, u32 flags); * int (*ndo_xsk_wakeup)(struct net_device *dev, u32 queue_id, u32 flags);
* This function is used to wake up the softirq, ksoftirqd or kthread * This function is used to wake up the softirq, ksoftirqd or kthread
* responsible for sending and/or receiving packets on a specific * responsible for sending and/or receiving packets on a specific
...@@ -1526,7 +1529,8 @@ struct net_device_ops { ...@@ -1526,7 +1529,8 @@ struct net_device_ops {
struct ip_tunnel_parm *p, int cmd); struct ip_tunnel_parm *p, int cmd);
struct net_device * (*ndo_get_peer_dev)(struct net_device *dev); struct net_device * (*ndo_get_peer_dev)(struct net_device *dev);
KABI_RESERVE(1) KABI_USE(1, struct net_device *(*ndo_xdp_get_xmit_slave)(struct net_device *dev,
struct xdp_buff *xdp))
KABI_RESERVE(2) KABI_RESERVE(2)
KABI_RESERVE(3) KABI_RESERVE(3)
KABI_RESERVE(4) KABI_RESERVE(4)
...@@ -3994,6 +3998,7 @@ typedef int (*bpf_op_t)(struct net_device *dev, struct netdev_bpf *bpf); ...@@ -3994,6 +3998,7 @@ typedef int (*bpf_op_t)(struct net_device *dev, struct netdev_bpf *bpf);
int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
int fd, int expected_fd, u32 flags); int fd, int expected_fd, u32 flags);
int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
u8 dev_xdp_prog_count(struct net_device *dev);
u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode); u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode);
int xdp_umem_query(struct net_device *dev, u16 queue_id); int xdp_umem_query(struct net_device *dev, u16 queue_id);
......
...@@ -237,7 +237,7 @@ struct bonding { ...@@ -237,7 +237,7 @@ struct bonding {
char proc_file_name[IFNAMSIZ]; char proc_file_name[IFNAMSIZ];
#endif /* CONFIG_PROC_FS */ #endif /* CONFIG_PROC_FS */
struct list_head bond_list; struct list_head bond_list;
u32 rr_tx_counter; u32 __percpu *rr_tx_counter;
struct ad_bond_info ad_info; struct ad_bond_info ad_info;
struct alb_bond_info alb_info; struct alb_bond_info alb_info;
struct bond_params params; struct bond_params params;
...@@ -258,6 +258,7 @@ struct bonding { ...@@ -258,6 +258,7 @@ struct bonding {
/* protecting ipsec_list */ /* protecting ipsec_list */
spinlock_t ipsec_lock; spinlock_t ipsec_lock;
#endif /* CONFIG_XFRM_OFFLOAD */ #endif /* CONFIG_XFRM_OFFLOAD */
struct bpf_prog *xdp_prog;
}; };
#define bond_slave_get_rcu(dev) \ #define bond_slave_get_rcu(dev) \
......
...@@ -9037,7 +9037,7 @@ static struct bpf_prog *dev_xdp_prog(struct net_device *dev, ...@@ -9037,7 +9037,7 @@ static struct bpf_prog *dev_xdp_prog(struct net_device *dev,
return dev->xdp_state[mode].prog; return dev->xdp_state[mode].prog;
} }
static u8 dev_xdp_prog_count(struct net_device *dev) u8 dev_xdp_prog_count(struct net_device *dev)
{ {
u8 count = 0; u8 count = 0;
int i; int i;
...@@ -9047,6 +9047,7 @@ static u8 dev_xdp_prog_count(struct net_device *dev) ...@@ -9047,6 +9047,7 @@ static u8 dev_xdp_prog_count(struct net_device *dev)
count++; count++;
return count; return count;
} }
EXPORT_SYMBOL_GPL(dev_xdp_prog_count);
u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode) u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode)
{ {
...@@ -9140,6 +9141,8 @@ static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack ...@@ -9140,6 +9141,8 @@ static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack
{ {
unsigned int num_modes = hweight32(flags & XDP_FLAGS_MODES); unsigned int num_modes = hweight32(flags & XDP_FLAGS_MODES);
struct bpf_prog *cur_prog; struct bpf_prog *cur_prog;
struct net_device *upper;
struct list_head *iter;
enum bpf_xdp_mode mode; enum bpf_xdp_mode mode;
bpf_op_t bpf_op; bpf_op_t bpf_op;
int err; int err;
...@@ -9178,6 +9181,14 @@ static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack ...@@ -9178,6 +9181,14 @@ static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack
return -EBUSY; return -EBUSY;
} }
/* don't allow if an upper device already has a program */
netdev_for_each_upper_dev_rcu(dev, upper, iter) {
if (dev_xdp_prog_count(upper) > 0) {
NL_SET_ERR_MSG(extack, "Cannot attach when an upper device already has a program");
return -EEXIST;
}
}
cur_prog = dev_xdp_prog(dev, mode); cur_prog = dev_xdp_prog(dev, mode);
/* can't replace attached prog with link */ /* can't replace attached prog with link */
if (link && cur_prog) { if (link && cur_prog) {
......
...@@ -3990,6 +3990,31 @@ void bpf_clear_redirect_map(struct bpf_map *map) ...@@ -3990,6 +3990,31 @@ void bpf_clear_redirect_map(struct bpf_map *map)
} }
} }
DEFINE_STATIC_KEY_FALSE(bpf_master_redirect_enabled_key);
EXPORT_SYMBOL_GPL(bpf_master_redirect_enabled_key);
u32 xdp_master_redirect(struct xdp_buff *xdp)
{
struct net_device *master, *slave;
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
master = netdev_master_upper_dev_get_rcu(xdp->rxq->dev);
slave = master->netdev_ops->ndo_xdp_get_xmit_slave(master, xdp);
if (slave && slave != xdp->rxq->dev) {
/* The target device is different from the receiving device, so
* redirect it to the new device.
* Using XDP_REDIRECT gets the correct behaviour from XDP enabled
* drivers to unmap the packet from their rx ring.
*/
ri->tgt_index = slave->ifindex;
//ri->map_id = INT_MAX; TODO: how to usage?
//ri->map_type = BPF_MAP_TYPE_UNSPEC;
return XDP_REDIRECT;
}
return XDP_TX;
}
EXPORT_SYMBOL_GPL(xdp_master_redirect);
int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp, int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
struct bpf_prog *xdp_prog) struct bpf_prog *xdp_prog)
{ {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册