提交 b0f80ad2 编写于 作者: J Junxian Huang 提交者: ZhouJuan

RDMA/hns: Support reset recovery for RoCE bonding

driver inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I6Z4E9

---------------------------------------------------------------

Currently, a RoCE bond device cannot be recovered to a bond
device after reset.

Applying this patch, RoCE bonding device 'hns_bond_xx' can be recovered
after reset, including the following changes:
   1. modify the condition for juding whether bond_grp is active, as the
      bond_grp may be also holding HNS_ROCE_CHANGE_BOND during reset init.
      Thus, as long as the bond_grp's state is not
      HNS_ROCE_BOND_NOT_BONDED, it should be considered active.
   2. update the link status of slave in bond_grp from NIC bonding driver
      right before sending command to firmware, as RoCE driver is uninited
      for a while in reset process, and during this period bond_grp cannot
      update the information.
   3. After the reset, re-config the bond_grp information to firmware,
      as the firmware is also reset and the previous configuration is
      cleared.
Signed-off-by: NJunxian Huang <huangjunxian6@hisilicon.com>
上级 01c810c8
...@@ -101,9 +101,7 @@ bool hns_roce_bond_is_active(struct hns_roce_dev *hr_dev) ...@@ -101,9 +101,7 @@ bool hns_roce_bond_is_active(struct hns_roce_dev *hr_dev)
{ {
struct hns_roce_bond_group *bond_grp = hns_roce_get_bond_grp(hr_dev); struct hns_roce_bond_group *bond_grp = hns_roce_get_bond_grp(hr_dev);
if (bond_grp && if (bond_grp && bond_grp->bond_state != HNS_ROCE_BOND_NOT_BONDED)
(bond_grp->bond_state == HNS_ROCE_BOND_REGISTERING ||
bond_grp->bond_state == HNS_ROCE_BOND_IS_BONDED))
return true; return true;
return false; return false;
...@@ -155,6 +153,7 @@ static void hns_roce_queue_bond_work(struct hns_roce_bond_group *bond_grp, ...@@ -155,6 +153,7 @@ static void hns_roce_queue_bond_work(struct hns_roce_bond_group *bond_grp,
static void hns_roce_bond_get_active_slave(struct hns_roce_bond_group *bond_grp) static void hns_roce_bond_get_active_slave(struct hns_roce_bond_group *bond_grp)
{ {
struct netdev_lag_lower_state_info *state;
struct net_device *net_dev; struct net_device *net_dev;
u32 active_slave_map = 0; u32 active_slave_map = 0;
u8 active_slave_num = 0; u8 active_slave_num = 0;
...@@ -163,14 +162,27 @@ static void hns_roce_bond_get_active_slave(struct hns_roce_bond_group *bond_grp) ...@@ -163,14 +162,27 @@ static void hns_roce_bond_get_active_slave(struct hns_roce_bond_group *bond_grp)
for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) { for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) {
net_dev = bond_grp->bond_func_info[i].net_dev; net_dev = bond_grp->bond_func_info[i].net_dev;
if (net_dev) { state = &bond_grp->bond_func_info[i].state;
active = (bond_grp->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) ? if (!net_dev)
bond_grp->bond_func_info[i].state.tx_enabled : continue;
bond_grp->bond_func_info[i].state.link_up;
if (active) { state->tx_enabled = (bond_grp->bond->curr_active_slave &&
active_slave_num++; (net_dev == bond_grp->bond->curr_active_slave->dev)) ?
active_slave_map |= (1 << i); 1 : 0;
} state->link_up =
(get_port_state(net_dev) == IB_PORT_ACTIVE) ? 1 : 0;
/*
* For bond mode 1(active-backup), only the tx-enabled slave is
* considered active. For other bond mode, all the link-up
* slaves are considered active.
*/
active = (bond_grp->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) ?
bond_grp->bond_func_info[i].state.tx_enabled :
bond_grp->bond_func_info[i].state.link_up;
if (active) {
active_slave_num++;
active_slave_map |= (1 << i);
} }
} }
...@@ -178,6 +190,13 @@ static void hns_roce_bond_get_active_slave(struct hns_roce_bond_group *bond_grp) ...@@ -178,6 +190,13 @@ static void hns_roce_bond_get_active_slave(struct hns_roce_bond_group *bond_grp)
bond_grp->active_slave_map = active_slave_map; bond_grp->active_slave_map = active_slave_map;
} }
static int hns_roce_recover_bond(struct hns_roce_bond_group *bond_grp)
{
hns_roce_bond_get_active_slave(bond_grp);
return hns_roce_cmd_bond(bond_grp, HNS_ROCE_SET_BOND);
}
static void hns_roce_set_bond(struct hns_roce_bond_group *bond_grp) static void hns_roce_set_bond(struct hns_roce_bond_group *bond_grp)
{ {
struct hns_roce_dev *hr_dev = NULL; struct hns_roce_dev *hr_dev = NULL;
...@@ -337,6 +356,9 @@ static void hns_roce_do_bond(struct hns_roce_bond_group *bond_grp) ...@@ -337,6 +356,9 @@ static void hns_roce_do_bond(struct hns_roce_bond_group *bond_grp)
enum hns_roce_bond_state bond_state = bond_grp->bond_state; enum hns_roce_bond_state bond_state = bond_grp->bond_state;
bool bond_ready = bond_grp->bond_ready; bool bond_ready = bond_grp->bond_ready;
if (!bond_grp->main_hr_dev)
return;
ibdev_info(&bond_grp->main_hr_dev->ib_dev, ibdev_info(&bond_grp->main_hr_dev->ib_dev,
"do_bond: bond_ready - %d, bond_state - %d.\n", "do_bond: bond_ready - %d, bond_state - %d.\n",
bond_ready, bond_grp->bond_state); bond_ready, bond_grp->bond_state);
...@@ -374,13 +396,29 @@ void hns_roce_do_bond_work(struct work_struct *work) ...@@ -374,13 +396,29 @@ void hns_roce_do_bond_work(struct work_struct *work)
int hns_roce_bond_init(struct hns_roce_dev *hr_dev) int hns_roce_bond_init(struct hns_roce_dev *hr_dev)
{ {
struct hns_roce_bond_group *bond_grp = hns_roce_get_bond_grp(hr_dev);
struct hns_roce_v2_priv *priv = hr_dev->priv;
int ret; int ret;
if (priv->handle->rinfo.reset_state == HNS_ROCE_STATE_RST_INIT &&
bond_grp) {
bond_grp->main_hr_dev = hr_dev;
ret = hns_roce_recover_bond(bond_grp);
if (ret) {
ibdev_err(&hr_dev->ib_dev,
"failed to recover RoCE bond, ret = %d.\n",
ret);
return ret;
}
bond_grp->bond_state = HNS_ROCE_BOND_IS_BONDED;
}
hr_dev->bond_nb.notifier_call = hns_roce_bond_event; hr_dev->bond_nb.notifier_call = hns_roce_bond_event;
ret = register_netdevice_notifier(&hr_dev->bond_nb); ret = register_netdevice_notifier(&hr_dev->bond_nb);
if (ret) { if (ret) {
ibdev_err(&hr_dev->ib_dev, ibdev_err(&hr_dev->ib_dev,
"failed to register notifier for RoCE bond!\n"); "failed to register notifier for RoCE bond, ret = %d.\n",
ret);
hr_dev->bond_nb.notifier_call = NULL; hr_dev->bond_nb.notifier_call = NULL;
} }
......
...@@ -826,13 +826,19 @@ static void hns_roce_unregister_device(struct hns_roce_dev *hr_dev, ...@@ -826,13 +826,19 @@ static void hns_roce_unregister_device(struct hns_roce_dev *hr_dev,
bool bond_cleanup) bool bond_cleanup)
{ {
struct hns_roce_ib_iboe *iboe = &hr_dev->iboe; struct hns_roce_ib_iboe *iboe = &hr_dev->iboe;
struct hns_roce_v2_priv *priv = hr_dev->priv;
struct hns_roce_bond_group *bond_grp; struct hns_roce_bond_group *bond_grp;
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_BOND) { if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_BOND) {
unregister_netdevice_notifier(&hr_dev->bond_nb); unregister_netdevice_notifier(&hr_dev->bond_nb);
bond_grp = hns_roce_get_bond_grp(hr_dev); bond_grp = hns_roce_get_bond_grp(hr_dev);
if (bond_grp && bond_cleanup) if (bond_grp) {
hns_roce_cleanup_bond(bond_grp); if (bond_cleanup)
hns_roce_cleanup_bond(bond_grp);
else if (priv->handle->rinfo.reset_state ==
HNS_ROCE_STATE_RST_UNINIT)
bond_grp->main_hr_dev = NULL;
}
} }
hr_dev->active = false; hr_dev->active = false;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册