From de77ecd4ef02ca783f7762e04e92b3d0964be66b Mon Sep 17 00:00:00 2001 From: Mahesh Bandewar Date: Mon, 27 Mar 2017 11:37:33 -0700 Subject: [PATCH] bonding: improve link-status update in mii-monitoring The primary issue is that mii-inspect phase updates link-state and expects changes to be committed during the mii-commit phase. After the inspect phase if it fails to acquire rtnl-mutex, the commit phase (bond_mii_commit) doesn't get to run. This partially updated state stays and makes the internal-state inconsistent. e.g. setup bond0 => slaves: eth1, eth2 eth1 goes DOWN -> UP mii_monitor() mii-inspect() bond_set_slave_link_state(eth1, UP, DontNotify) rtnl_trylock() <- fails! Next mii-monitor round eth1: No change mii_monitor() mii-inspect() eth1->link == current-status (ethtool_ops->get_link) no-change-detected End result: eth1: Link = BOND_LINK_UP Speed = 0xfffff [SpeedUnknown] Duplex = 0xff [DuplexUnknown] This doesn't always happen but for some unlucky machines in a large set of machines it creates problems. The fix for this is to avoid making changes during inspect phase and postpone them until acquiring the rtnl-mutex / invoking commit phase. Signed-off-by: Mahesh Bandewar Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index ba934020dfaa..85999e479916 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -2033,8 +2033,7 @@ static int bond_miimon_inspect(struct bonding *bond) if (link_state) continue; - bond_set_slave_link_state(slave, BOND_LINK_FAIL, - BOND_SLAVE_NOTIFY_LATER); + bond_propose_link_state(slave, BOND_LINK_FAIL); slave->delay = bond->params.downdelay; if (slave->delay) { netdev_info(bond->dev, "link status down for %sinterface %s, disabling it in %d ms\n", @@ -2049,8 +2048,7 @@ static int bond_miimon_inspect(struct bonding *bond) case BOND_LINK_FAIL: if (link_state) { /* recovered before downdelay expired */ - bond_set_slave_link_state(slave, BOND_LINK_UP, - BOND_SLAVE_NOTIFY_LATER); + bond_propose_link_state(slave, BOND_LINK_UP); slave->last_link_up = jiffies; netdev_info(bond->dev, "link status up again after %d ms for interface %s\n", (bond->params.downdelay - slave->delay) * @@ -2072,8 +2070,7 @@ static int bond_miimon_inspect(struct bonding *bond) if (!link_state) continue; - bond_set_slave_link_state(slave, BOND_LINK_BACK, - BOND_SLAVE_NOTIFY_LATER); + bond_propose_link_state(slave, BOND_LINK_BACK); slave->delay = bond->params.updelay; if (slave->delay) { @@ -2086,9 +2083,7 @@ static int bond_miimon_inspect(struct bonding *bond) /*FALLTHRU*/ case BOND_LINK_BACK: if (!link_state) { - bond_set_slave_link_state(slave, - BOND_LINK_DOWN, - BOND_SLAVE_NOTIFY_LATER); + bond_propose_link_state(slave, BOND_LINK_DOWN); netdev_info(bond->dev, "link status down again after %d ms for interface %s\n", (bond->params.updelay - slave->delay) * bond->params.miimon, @@ -2225,6 +2220,8 @@ static void bond_mii_monitor(struct work_struct *work) mii_work.work); bool should_notify_peers = false; unsigned long delay; + struct slave *slave; + struct list_head *iter; delay = msecs_to_jiffies(bond->params.miimon); @@ -2245,6 +2242,9 @@ static void bond_mii_monitor(struct work_struct *work) goto re_arm; } + bond_for_each_slave(bond, slave, iter) { + bond_commit_link_state(slave, BOND_SLAVE_NOTIFY_LATER); + } bond_miimon_commit(bond); rtnl_unlock(); /* might sleep, hold no other locks */ -- GitLab