提交 df012169 编写于 作者: D David S. Miller

Merge branch 'bonding_rcu'

Ding Tianhong says:

====================
bonding: rebuild the lock use for bond monitor

Now the bond slave list is not protected by bond lock, only by RTNL,
but the monitor still use the bond lock to protect the slave list,
it is useless, according to the Veaceslav's opinion, there were
three way to fix the protect problem:

1. add bond_master_upper_dev_link() and bond_upper_dev_unlink()
   in bond->lock, but it is unsafe to call call_netdevice_notifiers()
   in write lock.
2. remove unused bond->lock for monitor function, only use the exist
   rtnl lock(), it will take performance loss in fast path.
3. use RCU to protect the slave list, of course, performance is better,
   but in slow path, it is ignored.

obviously the solution 1 is not fit here, I will consider the 2 and 3
solution. My principle is simple, if in fast path, RCU is better,
otherwise in slow path, both is well, but according to the Jay Vosburgh's
opinion, the monitor will loss performace if use RTNL to protect the all
slave list, so remove the bond lock and replace with RCU.

The second problem is the curr_slave_lock for bond, it is too old and
unwanted in many place, because the curr_active_slave would only be
changed in 3 place:

1. enslave slave.
2. release slave.
3. change active slave.

all above were already holding bond lock, RTNL and curr_slave_lock
together, it is tedious and no need to add so mach lock, when change
the curr_active_slave, you have to hold the RTNL and curr_slave_lock
together, and when you read the curr_active_slave, RTNL or curr_slave_lock,
any one of them is no problem.

for the stability, I did not change the logic for the monitor,
all change is clear and simple, I have test the patch set for lockdep,
it work well and stability.

v2. accept the Jay Vosburgh's opinion, remove the RTNL and replace with RCU,
    also add some rcu function for bond use, so the patch set reach 10.

v3. accept the Nikolay Aleksandrov's opinion, remove no needed bond_has_slave_rcu(),
    add protection for several 3ad mode handler functions and current_arp_slave.
    rebuild the bond_first_slave_rcu(), make it more clear.

v4. because the struct netdev_adjacent should not be exist in netdevice.h, so I have
    to make a new function to support micro bond_first_slave_rcu().
    also add a new patch to simplify the bond_resend_igmp_join_requests_delayed().

v5. according the Jay Vosburgh's opinion, in patch 2 and 6, the calling of notify
    peer is hardly to happen with the bond_xxx_commit() when the monitoring is running,
    so the performance impact about make two round trips to one trip on RTNL is minimal,
    no need to do that,the reason is very clear, so modify the patch 2 and 6, recover
    the notify peer in RTNL alone.
====================
Signed-off-by: NJay Vosburgh <fubar@us.ibm.com>
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
......@@ -147,11 +147,12 @@ static inline struct aggregator *__get_first_agg(struct port *port)
struct bonding *bond = __get_bond_by_port(port);
struct slave *first_slave;
// If there's no bond for this port, or bond has no slaves
/* If there's no bond for this port, or bond has no slaves */
if (bond == NULL)
return NULL;
first_slave = bond_first_slave(bond);
rcu_read_lock();
first_slave = bond_first_slave_rcu(bond);
rcu_read_unlock();
return first_slave ? &(SLAVE_AD_INFO(first_slave).aggregator) : NULL;
}
......@@ -702,9 +703,13 @@ static struct aggregator *__get_active_agg(struct aggregator *aggregator)
struct list_head *iter;
struct slave *slave;
bond_for_each_slave(bond, slave, iter)
if (SLAVE_AD_INFO(slave).aggregator.is_active)
rcu_read_lock();
bond_for_each_slave_rcu(bond, slave, iter)
if (SLAVE_AD_INFO(slave).aggregator.is_active) {
rcu_read_unlock();
return &(SLAVE_AD_INFO(slave).aggregator);
}
rcu_read_unlock();
return NULL;
}
......@@ -1471,7 +1476,8 @@ static void ad_agg_selection_logic(struct aggregator *agg)
active = __get_active_agg(agg);
best = (active && agg_device_up(active)) ? active : NULL;
bond_for_each_slave(bond, slave, iter) {
rcu_read_lock();
bond_for_each_slave_rcu(bond, slave, iter) {
agg = &(SLAVE_AD_INFO(slave).aggregator);
agg->is_active = 0;
......@@ -1505,7 +1511,7 @@ static void ad_agg_selection_logic(struct aggregator *agg)
active->is_active = 1;
}
// if there is new best aggregator, activate it
/* if there is new best aggregator, activate it */
if (best) {
pr_debug("best Agg=%d; P=%d; a k=%d; p k=%d; Ind=%d; Act=%d\n",
best->aggregator_identifier, best->num_of_ports,
......@@ -1516,7 +1522,7 @@ static void ad_agg_selection_logic(struct aggregator *agg)
best->lag_ports, best->slave,
best->slave ? best->slave->dev->name : "NULL");
bond_for_each_slave(bond, slave, iter) {
bond_for_each_slave_rcu(bond, slave, iter) {
agg = &(SLAVE_AD_INFO(slave).aggregator);
pr_debug("Agg=%d; P=%d; a k=%d; p k=%d; Ind=%d; Act=%d\n",
......@@ -1526,10 +1532,11 @@ static void ad_agg_selection_logic(struct aggregator *agg)
agg->is_individual, agg->is_active);
}
// check if any partner replys
/* check if any partner replys */
if (best->is_individual) {
pr_warning("%s: Warning: No 802.3ad response from the link partner for any adapters in the bond\n",
best->slave ? best->slave->bond->dev->name : "NULL");
best->slave ?
best->slave->bond->dev->name : "NULL");
}
best->is_active = 1;
......@@ -1541,7 +1548,7 @@ static void ad_agg_selection_logic(struct aggregator *agg)
best->partner_oper_aggregator_key,
best->is_individual, best->is_active);
// disable the ports that were related to the former active_aggregator
/* disable the ports that were related to the former active_aggregator */
if (active) {
for (port = active->lag_ports; port;
port = port->next_port_in_aggregator) {
......@@ -1565,6 +1572,8 @@ static void ad_agg_selection_logic(struct aggregator *agg)
}
}
rcu_read_unlock();
bond_3ad_set_carrier(bond);
}
......@@ -2069,17 +2078,18 @@ void bond_3ad_state_machine_handler(struct work_struct *work)
struct port *port;
read_lock(&bond->lock);
rcu_read_lock();
//check if there are any slaves
/* check if there are any slaves */
if (!bond_has_slaves(bond))
goto re_arm;
// check if agg_select_timer timer after initialize is timed out
/* check if agg_select_timer timer after initialize is timed out */
if (BOND_AD_INFO(bond).agg_select_timer && !(--BOND_AD_INFO(bond).agg_select_timer)) {
slave = bond_first_slave(bond);
slave = bond_first_slave_rcu(bond);
port = slave ? &(SLAVE_AD_INFO(slave).port) : NULL;
// select the active aggregator for the bond
/* select the active aggregator for the bond */
if (port) {
if (!port->slave) {
pr_warning("%s: Warning: bond's first port is uninitialized\n",
......@@ -2093,8 +2103,8 @@ void bond_3ad_state_machine_handler(struct work_struct *work)
bond_3ad_set_carrier(bond);
}
// for each port run the state machines
bond_for_each_slave(bond, slave, iter) {
/* for each port run the state machines */
bond_for_each_slave_rcu(bond, slave, iter) {
port = &(SLAVE_AD_INFO(slave).port);
if (!port->slave) {
pr_warning("%s: Warning: Found an uninitialized port\n",
......@@ -2114,7 +2124,7 @@ void bond_3ad_state_machine_handler(struct work_struct *work)
ad_mux_machine(port);
ad_tx_machine(port);
// turn off the BEGIN bit, since we already handled it
/* turn off the BEGIN bit, since we already handled it */
if (port->sm_vars & AD_PORT_BEGIN)
port->sm_vars &= ~AD_PORT_BEGIN;
......@@ -2122,9 +2132,9 @@ void bond_3ad_state_machine_handler(struct work_struct *work)
}
re_arm:
queue_delayed_work(bond->wq, &bond->ad_work, ad_delta_in_ticks);
rcu_read_unlock();
read_unlock(&bond->lock);
queue_delayed_work(bond->wq, &bond->ad_work, ad_delta_in_ticks);
}
/**
......@@ -2303,7 +2313,9 @@ int bond_3ad_set_carrier(struct bonding *bond)
struct aggregator *active;
struct slave *first_slave;
first_slave = bond_first_slave(bond);
rcu_read_lock();
first_slave = bond_first_slave_rcu(bond);
rcu_read_unlock();
if (!first_slave)
return 0;
active = __get_active_agg(&(SLAVE_AD_INFO(first_slave).aggregator));
......
......@@ -469,7 +469,7 @@ static void rlb_teach_disabled_mac_on_primary(struct bonding *bond, u8 addr[])
/* slave being removed should not be active at this point
*
* Caller must hold bond lock for read
* Caller must hold rtnl.
*/
static void rlb_clear_slave(struct bonding *bond, struct slave *slave)
{
......@@ -815,7 +815,7 @@ static void rlb_rebalance(struct bonding *bond)
for (; hash_index != RLB_NULL_INDEX;
hash_index = client_info->used_next) {
client_info = &(bond_info->rx_hashtbl[hash_index]);
assigned_slave = rlb_next_rx_slave(bond);
assigned_slave = __rlb_next_rx_slave(bond);
if (assigned_slave && (client_info->slave != assigned_slave)) {
client_info->slave = assigned_slave;
client_info->ntt = 1;
......@@ -1494,14 +1494,14 @@ void bond_alb_monitor(struct work_struct *work)
struct list_head *iter;
struct slave *slave;
read_lock(&bond->lock);
if (!bond_has_slaves(bond)) {
bond_info->tx_rebalance_counter = 0;
bond_info->lp_counter = 0;
goto re_arm;
}
rcu_read_lock();
bond_info->tx_rebalance_counter++;
bond_info->lp_counter++;
......@@ -1514,7 +1514,7 @@ void bond_alb_monitor(struct work_struct *work)
*/
read_lock(&bond->curr_slave_lock);
bond_for_each_slave(bond, slave, iter)
bond_for_each_slave_rcu(bond, slave, iter)
alb_send_learning_packets(slave, slave->dev->dev_addr);
read_unlock(&bond->curr_slave_lock);
......@@ -1527,7 +1527,7 @@ void bond_alb_monitor(struct work_struct *work)
read_lock(&bond->curr_slave_lock);
bond_for_each_slave(bond, slave, iter) {
bond_for_each_slave_rcu(bond, slave, iter) {
tlb_clear_slave(bond, slave, 1);
if (slave == bond->curr_active_slave) {
SLAVE_TLB_INFO(slave).load =
......@@ -1551,11 +1551,9 @@ void bond_alb_monitor(struct work_struct *work)
* dev_set_promiscuity requires rtnl and
* nothing else. Avoid race with bond_close.
*/
read_unlock(&bond->lock);
if (!rtnl_trylock()) {
read_lock(&bond->lock);
rcu_read_unlock();
if (!rtnl_trylock())
goto re_arm;
}
bond_info->rlb_promisc_timeout_counter = 0;
......@@ -1567,7 +1565,7 @@ void bond_alb_monitor(struct work_struct *work)
bond_info->primary_is_promisc = 0;
rtnl_unlock();
read_lock(&bond->lock);
rcu_read_lock();
}
if (bond_info->rlb_rebalance) {
......@@ -1589,11 +1587,9 @@ void bond_alb_monitor(struct work_struct *work)
}
}
}
rcu_read_unlock();
re_arm:
queue_delayed_work(bond->wq, &bond->alb_work, alb_delta_in_ticks);
read_unlock(&bond->lock);
}
/* assumption: called before the slave is attached to the bond
......@@ -1679,14 +1675,11 @@ void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char
* If new_slave is NULL, caller must hold curr_slave_lock or
* bond->lock for write.
*
* If new_slave is not NULL, caller must hold RTNL, bond->lock for
* read and curr_slave_lock for write. Processing here may sleep, so
* no other locks may be held.
* If new_slave is not NULL, caller must hold RTNL, curr_slave_lock
* for write. Processing here may sleep, so no other locks may be held.
*/
void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave)
__releases(&bond->curr_slave_lock)
__releases(&bond->lock)
__acquires(&bond->lock)
__acquires(&bond->curr_slave_lock)
{
struct slave *swap_slave;
......@@ -1722,7 +1715,6 @@ void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave
tlb_clear_slave(bond, new_slave, 1);
write_unlock_bh(&bond->curr_slave_lock);
read_unlock(&bond->lock);
ASSERT_RTNL();
......@@ -1748,11 +1740,9 @@ void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave
/* swap mac address */
alb_swap_mac_addr(swap_slave, new_slave);
alb_fasten_mac_swap(bond, swap_slave, new_slave);
read_lock(&bond->lock);
} else {
/* set the new_slave to the bond mac address */
alb_set_slave_mac_addr(new_slave, bond->dev->dev_addr);
read_lock(&bond->lock);
alb_send_learning_packets(new_slave, bond->dev->dev_addr);
}
......
......@@ -591,33 +591,22 @@ static int bond_set_allmulti(struct bonding *bond, int inc)
* device and retransmit an IGMP JOIN request to the current active
* slave.
*/
static void bond_resend_igmp_join_requests(struct bonding *bond)
static void bond_resend_igmp_join_requests_delayed(struct work_struct *work)
{
struct bonding *bond = container_of(work, struct bonding,
mcast_work.work);
if (!rtnl_trylock()) {
queue_delayed_work(bond->wq, &bond->mcast_work, 1);
return;
}
call_netdevice_notifiers(NETDEV_RESEND_IGMP, bond->dev);
rtnl_unlock();
/* We use curr_slave_lock to protect against concurrent access to
* igmp_retrans from multiple running instances of this function and
* bond_change_active_slave
*/
write_lock_bh(&bond->curr_slave_lock);
if (bond->igmp_retrans > 1) {
bond->igmp_retrans--;
queue_delayed_work(bond->wq, &bond->mcast_work, HZ/5);
}
write_unlock_bh(&bond->curr_slave_lock);
}
static void bond_resend_igmp_join_requests_delayed(struct work_struct *work)
{
struct bonding *bond = container_of(work, struct bonding,
mcast_work.work);
bond_resend_igmp_join_requests(bond);
rtnl_unlock();
}
/* Flush bond's hardware addresses from slave
......@@ -697,14 +686,12 @@ static void bond_set_dev_addr(struct net_device *bond_dev,
*
* Perform special MAC address swapping for fail_over_mac settings
*
* Called with RTNL, bond->lock for read, curr_slave_lock for write_bh.
* Called with RTNL, curr_slave_lock for write_bh.
*/
static void bond_do_fail_over_mac(struct bonding *bond,
struct slave *new_active,
struct slave *old_active)
__releases(&bond->curr_slave_lock)
__releases(&bond->lock)
__acquires(&bond->lock)
__acquires(&bond->curr_slave_lock)
{
u8 tmp_mac[ETH_ALEN];
......@@ -715,9 +702,7 @@ static void bond_do_fail_over_mac(struct bonding *bond,
case BOND_FOM_ACTIVE:
if (new_active) {
write_unlock_bh(&bond->curr_slave_lock);
read_unlock(&bond->lock);
bond_set_dev_addr(bond->dev, new_active->dev);
read_lock(&bond->lock);
write_lock_bh(&bond->curr_slave_lock);
}
break;
......@@ -731,7 +716,6 @@ static void bond_do_fail_over_mac(struct bonding *bond,
return;
write_unlock_bh(&bond->curr_slave_lock);
read_unlock(&bond->lock);
if (old_active) {
memcpy(tmp_mac, new_active->dev->dev_addr, ETH_ALEN);
......@@ -761,7 +745,6 @@ static void bond_do_fail_over_mac(struct bonding *bond,
pr_err("%s: Error %d setting MAC of slave %s\n",
bond->dev->name, -rv, new_active->dev->name);
out:
read_lock(&bond->lock);
write_lock_bh(&bond->curr_slave_lock);
break;
default:
......@@ -821,7 +804,11 @@ static struct slave *bond_find_best_slave(struct bonding *bond)
static bool bond_should_notify_peers(struct bonding *bond)
{
struct slave *slave = bond->curr_active_slave;
struct slave *slave;
rcu_read_lock();
slave = rcu_dereference(bond->curr_active_slave);
rcu_read_unlock();
pr_debug("bond_should_notify_peers: bond %s slave %s\n",
bond->dev->name, slave ? slave->dev->name : "NULL");
......@@ -846,8 +833,7 @@ static bool bond_should_notify_peers(struct bonding *bond)
* because it is apparently the best available slave we have, even though its
* updelay hasn't timed out yet.
*
* If new_active is not NULL, caller must hold bond->lock for read and
* curr_slave_lock for write_bh.
* If new_active is not NULL, caller must hold curr_slave_lock for write_bh.
*/
void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
{
......@@ -916,14 +902,12 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
}
write_unlock_bh(&bond->curr_slave_lock);
read_unlock(&bond->lock);
call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, bond->dev);
if (should_notify_peers)
call_netdevice_notifiers(NETDEV_NOTIFY_PEERS,
bond->dev);
read_lock(&bond->lock);
write_lock_bh(&bond->curr_slave_lock);
}
}
......@@ -949,7 +933,7 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
* - The primary_slave has got its link back.
* - A slave has got its link back and there's no old curr_active_slave.
*
* Caller must hold bond->lock for read and curr_slave_lock for write_bh.
* Caller must hold curr_slave_lock for write_bh.
*/
void bond_select_active_slave(struct bonding *bond)
{
......@@ -1594,11 +1578,9 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
bond_set_carrier(bond);
if (USES_PRIMARY(bond->params.mode)) {
read_lock(&bond->lock);
write_lock_bh(&bond->curr_slave_lock);
bond_select_active_slave(bond);
write_unlock_bh(&bond->curr_slave_lock);
read_unlock(&bond->lock);
}
pr_info("%s: enslaving %s as a%s interface with a%s link.\n",
......@@ -1618,19 +1600,13 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
bond_hw_addr_flush(bond_dev, slave_dev);
vlan_vids_del_by_dev(slave_dev, bond_dev);
write_lock_bh(&bond->lock);
if (bond->primary_slave == new_slave)
bond->primary_slave = NULL;
if (bond->curr_active_slave == new_slave) {
bond_change_active_slave(bond, NULL);
write_unlock_bh(&bond->lock);
read_lock(&bond->lock);
write_lock_bh(&bond->curr_slave_lock);
bond_change_active_slave(bond, NULL);
bond_select_active_slave(bond);
write_unlock_bh(&bond->curr_slave_lock);
read_unlock(&bond->lock);
} else {
write_unlock_bh(&bond->lock);
}
slave_disable_netpoll(new_slave);
......@@ -1695,20 +1671,16 @@ static int __bond_release_one(struct net_device *bond_dev,
}
block_netpoll_tx();
write_lock_bh(&bond->lock);
slave = bond_get_slave_by_dev(bond, slave_dev);
if (!slave) {
/* not a slave of this bond */
pr_info("%s: %s not enslaved\n",
bond_dev->name, slave_dev->name);
write_unlock_bh(&bond->lock);
unblock_netpoll_tx();
return -EINVAL;
}
write_unlock_bh(&bond->lock);
/* release the slave from its bond */
bond->slave_cnt--;
......@@ -1720,12 +1692,10 @@ static int __bond_release_one(struct net_device *bond_dev,
write_lock_bh(&bond->lock);
/* Inform AD package of unbinding of slave. */
if (bond->params.mode == BOND_MODE_8023AD) {
/* must be called before the slave is
* detached from the list
*/
if (bond->params.mode == BOND_MODE_8023AD)
bond_3ad_unbind_slave(slave);
}
write_unlock_bh(&bond->lock);
pr_info("%s: releasing %s interface %s\n",
bond_dev->name,
......@@ -1748,8 +1718,11 @@ static int __bond_release_one(struct net_device *bond_dev,
if (bond->primary_slave == slave)
bond->primary_slave = NULL;
if (oldcurrent == slave)
if (oldcurrent == slave) {
write_lock_bh(&bond->curr_slave_lock);
bond_change_active_slave(bond, NULL);
write_unlock_bh(&bond->curr_slave_lock);
}
if (bond_is_lb(bond)) {
/* Must be called only after the slave has been
......@@ -1757,9 +1730,7 @@ static int __bond_release_one(struct net_device *bond_dev,
* has been cleared (if our_slave == old_current),
* but before a new active slave is selected.
*/
write_unlock_bh(&bond->lock);
bond_alb_deinit_slave(bond, slave);
write_lock_bh(&bond->lock);
}
if (all) {
......@@ -1770,15 +1741,11 @@ static int __bond_release_one(struct net_device *bond_dev,
* is no concern that another slave add/remove event
* will interfere.
*/
write_unlock_bh(&bond->lock);
read_lock(&bond->lock);
write_lock_bh(&bond->curr_slave_lock);
bond_select_active_slave(bond);
write_unlock_bh(&bond->curr_slave_lock);
read_unlock(&bond->lock);
write_lock_bh(&bond->lock);
}
if (!bond_has_slaves(bond)) {
......@@ -1793,7 +1760,6 @@ static int __bond_release_one(struct net_device *bond_dev,
}
}
write_unlock_bh(&bond->lock);
unblock_netpoll_tx();
synchronize_rcu();
......@@ -1928,7 +1894,7 @@ static int bond_miimon_inspect(struct bonding *bond)
ignore_updelay = !bond->curr_active_slave ? true : false;
bond_for_each_slave(bond, slave, iter) {
bond_for_each_slave_rcu(bond, slave, iter) {
slave->new_link = BOND_LINK_NOCHANGE;
link_state = bond_check_dev_link(bond, slave->dev, 0);
......@@ -2126,41 +2092,35 @@ void bond_mii_monitor(struct work_struct *work)
bool should_notify_peers = false;
unsigned long delay;
read_lock(&bond->lock);
delay = msecs_to_jiffies(bond->params.miimon);
if (!bond_has_slaves(bond))
goto re_arm;
rcu_read_lock();
should_notify_peers = bond_should_notify_peers(bond);
if (bond_miimon_inspect(bond)) {
read_unlock(&bond->lock);
rcu_read_unlock();
/* Race avoidance with bond_close cancel of workqueue */
if (!rtnl_trylock()) {
read_lock(&bond->lock);
delay = 1;
should_notify_peers = false;
goto re_arm;
}
read_lock(&bond->lock);
bond_miimon_commit(bond);
read_unlock(&bond->lock);
rtnl_unlock(); /* might sleep, hold no other locks */
read_lock(&bond->lock);
}
} else
rcu_read_unlock();
re_arm:
if (bond->params.miimon)
queue_delayed_work(bond->wq, &bond->mii_work, delay);
read_unlock(&bond->lock);
if (should_notify_peers) {
if (!rtnl_trylock())
return;
......@@ -2422,12 +2382,12 @@ void bond_loadbalance_arp_mon(struct work_struct *work)
struct list_head *iter;
int do_failover = 0;
read_lock(&bond->lock);
if (!bond_has_slaves(bond))
goto re_arm;
oldcurrent = bond->curr_active_slave;
rcu_read_lock();
oldcurrent = ACCESS_ONCE(bond->curr_active_slave);
/* see if any of the previous devices are up now (i.e. they have
* xmt and rcv traffic). the curr_active_slave does not come into
* the picture unless it is null. also, slave->jiffies is not needed
......@@ -2436,7 +2396,7 @@ void bond_loadbalance_arp_mon(struct work_struct *work)
* TODO: what about up/down delay in arp mode? it wasn't here before
* so it can wait
*/
bond_for_each_slave(bond, slave, iter) {
bond_for_each_slave_rcu(bond, slave, iter) {
unsigned long trans_start = dev_trans_start(slave->dev);
if (slave->link != BOND_LINK_UP) {
......@@ -2498,7 +2458,14 @@ void bond_loadbalance_arp_mon(struct work_struct *work)
bond_arp_send_all(bond, slave);
}
rcu_read_unlock();
if (do_failover) {
/* the bond_select_active_slave must hold RTNL
* and curr_slave_lock for write.
*/
if (!rtnl_trylock())
goto re_arm;
block_netpoll_tx();
write_lock_bh(&bond->curr_slave_lock);
......@@ -2506,14 +2473,13 @@ void bond_loadbalance_arp_mon(struct work_struct *work)
write_unlock_bh(&bond->curr_slave_lock);
unblock_netpoll_tx();
rtnl_unlock();
}
re_arm:
if (bond->params.arp_interval)
queue_delayed_work(bond->wq, &bond->arp_work,
msecs_to_jiffies(bond->params.arp_interval));
read_unlock(&bond->lock);
}
/*
......@@ -2522,7 +2488,7 @@ void bond_loadbalance_arp_mon(struct work_struct *work)
* place for the slave. Returns 0 if no changes are found, >0 if changes
* to link states must be committed.
*
* Called with bond->lock held for read.
* Called with rcu_read_lock hold.
*/
static int bond_ab_arp_inspect(struct bonding *bond)
{
......@@ -2531,7 +2497,7 @@ static int bond_ab_arp_inspect(struct bonding *bond)
struct slave *slave;
int commit = 0;
bond_for_each_slave(bond, slave, iter) {
bond_for_each_slave_rcu(bond, slave, iter) {
slave->new_link = BOND_LINK_NOCHANGE;
last_rx = slave_last_rx(bond, slave);
......@@ -2593,7 +2559,7 @@ static int bond_ab_arp_inspect(struct bonding *bond)
* Called to commit link state changes noted by inspection step of
* active-backup mode ARP monitor.
*
* Called with RTNL and bond->lock for read.
* Called with RTNL hold.
*/
static void bond_ab_arp_commit(struct bonding *bond)
{
......@@ -2668,19 +2634,20 @@ static void bond_ab_arp_commit(struct bonding *bond)
/*
* Send ARP probes for active-backup mode ARP monitor.
*
* Called with bond->lock held for read.
* Called with rcu_read_lock hold.
*/
static void bond_ab_arp_probe(struct bonding *bond)
{
struct slave *slave, *before = NULL, *new_slave = NULL;
struct slave *slave, *before = NULL, *new_slave = NULL,
*curr_arp_slave = rcu_dereference(bond->current_arp_slave);
struct list_head *iter;
bool found = false;
read_lock(&bond->curr_slave_lock);
if (bond->current_arp_slave && bond->curr_active_slave)
if (curr_arp_slave && bond->curr_active_slave)
pr_info("PROBE: c_arp %s && cas %s BAD\n",
bond->current_arp_slave->dev->name,
curr_arp_slave->dev->name,
bond->curr_active_slave->dev->name);
if (bond->curr_active_slave) {
......@@ -2696,15 +2663,15 @@ static void bond_ab_arp_probe(struct bonding *bond)
* for becoming the curr_active_slave
*/
if (!bond->current_arp_slave) {
bond->current_arp_slave = bond_first_slave(bond);
if (!bond->current_arp_slave)
if (!curr_arp_slave) {
curr_arp_slave = bond_first_slave_rcu(bond);
if (!curr_arp_slave)
return;
}
bond_set_slave_inactive_flags(bond->current_arp_slave);
bond_set_slave_inactive_flags(curr_arp_slave);
bond_for_each_slave(bond, slave, iter) {
bond_for_each_slave_rcu(bond, slave, iter) {
if (!found && !before && IS_UP(slave->dev))
before = slave;
......@@ -2727,7 +2694,7 @@ static void bond_ab_arp_probe(struct bonding *bond)
pr_info("%s: backup interface %s is now down.\n",
bond->dev->name, slave->dev->name);
}
if (slave == bond->current_arp_slave)
if (slave == curr_arp_slave)
found = true;
}
......@@ -2741,8 +2708,7 @@ static void bond_ab_arp_probe(struct bonding *bond)
bond_set_slave_active_flags(new_slave);
bond_arp_send_all(bond, new_slave);
new_slave->jiffies = jiffies;
bond->current_arp_slave = new_slave;
rcu_assign_pointer(bond->current_arp_slave, new_slave);
}
void bond_activebackup_arp_mon(struct work_struct *work)
......@@ -2752,43 +2718,38 @@ void bond_activebackup_arp_mon(struct work_struct *work)
bool should_notify_peers = false;
int delta_in_ticks;
read_lock(&bond->lock);
delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval);
if (!bond_has_slaves(bond))
goto re_arm;
rcu_read_lock();
should_notify_peers = bond_should_notify_peers(bond);
if (bond_ab_arp_inspect(bond)) {
read_unlock(&bond->lock);
rcu_read_unlock();
/* Race avoidance with bond_close flush of workqueue */
if (!rtnl_trylock()) {
read_lock(&bond->lock);
delta_in_ticks = 1;
should_notify_peers = false;
goto re_arm;
}
read_lock(&bond->lock);
bond_ab_arp_commit(bond);
read_unlock(&bond->lock);
rtnl_unlock();
read_lock(&bond->lock);
rcu_read_lock();
}
bond_ab_arp_probe(bond);
rcu_read_unlock();
re_arm:
if (bond->params.arp_interval)
queue_delayed_work(bond->wq, &bond->arp_work, delta_in_ticks);
read_unlock(&bond->lock);
if (should_notify_peers) {
if (!rtnl_trylock())
return;
......
......@@ -107,7 +107,6 @@ int bond_option_active_slave_set(struct bonding *bond,
}
block_netpoll_tx();
read_lock(&bond->lock);
write_lock_bh(&bond->curr_slave_lock);
/* check to see if we are clearing active */
......@@ -142,7 +141,6 @@ int bond_option_active_slave_set(struct bonding *bond,
}
write_unlock_bh(&bond->curr_slave_lock);
read_unlock(&bond->lock);
unblock_netpoll_tx();
return ret;
}
......
......@@ -878,7 +878,6 @@ static ssize_t bonding_store_primary(struct device *d,
if (!rtnl_trylock())
return restart_syscall();
block_netpoll_tx();
read_lock(&bond->lock);
write_lock_bh(&bond->curr_slave_lock);
if (!USES_PRIMARY(bond->params.mode)) {
......@@ -918,7 +917,6 @@ static ssize_t bonding_store_primary(struct device *d,
bond->dev->name, ifname, bond->dev->name);
out:
write_unlock_bh(&bond->curr_slave_lock);
read_unlock(&bond->lock);
unblock_netpoll_tx();
rtnl_unlock();
......@@ -966,11 +964,9 @@ static ssize_t bonding_store_primary_reselect(struct device *d,
new_value);
block_netpoll_tx();
read_lock(&bond->lock);
write_lock_bh(&bond->curr_slave_lock);
bond_select_active_slave(bond);
write_unlock_bh(&bond->curr_slave_lock);
read_unlock(&bond->lock);
unblock_netpoll_tx();
out:
rtnl_unlock();
......
......@@ -101,6 +101,10 @@
netdev_adjacent_get_private(bond_slave_list(bond)->prev) : \
NULL)
/* Caller must have rcu_read_lock */
#define bond_first_slave_rcu(bond) \
netdev_lower_get_first_private_rcu(bond->dev)
#define bond_is_first_slave(bond, pos) (pos == bond_first_slave(bond))
#define bond_is_last_slave(bond, pos) (pos == bond_last_slave(bond))
......
......@@ -2907,6 +2907,7 @@ void *netdev_lower_get_next_private_rcu(struct net_device *dev,
priv = netdev_lower_get_next_private_rcu(dev, &(iter)))
void *netdev_adjacent_get_private(struct list_head *adj_list);
void *netdev_lower_get_first_private_rcu(struct net_device *dev);
struct net_device *netdev_master_upper_dev_get(struct net_device *dev);
struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev);
int netdev_upper_dev_link(struct net_device *dev, struct net_device *upper_dev);
......
......@@ -4543,6 +4543,27 @@ void *netdev_lower_get_next_private_rcu(struct net_device *dev,
}
EXPORT_SYMBOL(netdev_lower_get_next_private_rcu);
/**
* netdev_lower_get_first_private_rcu - Get the first ->private from the
* lower neighbour list, RCU
* variant
* @dev: device
*
* Gets the first netdev_adjacent->private from the dev's lower neighbour
* list. The caller must hold RCU read lock.
*/
void *netdev_lower_get_first_private_rcu(struct net_device *dev)
{
struct netdev_adjacent *lower;
lower = list_first_or_null_rcu(&dev->adj_list.lower,
struct netdev_adjacent, list);
if (lower)
return lower->private;
return NULL;
}
EXPORT_SYMBOL(netdev_lower_get_first_private_rcu);
/**
* netdev_master_upper_dev_get_rcu - Get master upper device
* @dev: device
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册