未验证 提交 866b2e97 编写于 作者: O openeuler-ci-bot 提交者: Gitee

!219 【OLK-5.10】RDMA/hns: Support for bonding

Merge Pull Request from: @hellotcc 
 
This PR add support for bonding on hns roce.
**ISSUE**
https://gitee.com/openeuler/kernel/issues/I5Z6L8
**TEST**
related test log has been attached to the issue 
 
Link:https://gitee.com/openeuler/kernel/pulls/219 
Reviewed-by: Zheng Zengkai <zhengzengkai@huawei.com> 
Reviewed-by: Yue Haibing <yuehaibing@huawei.com> 
Reviewed-by: Ling Mingqiang <lingmingqiang@huawei.com> 
Signed-off-by: Zheng Zengkai <zhengzengkai@huawei.com> 
...@@ -7,7 +7,8 @@ ccflags-y := -I $(srctree)/drivers/net/ethernet/hisilicon/hns3 ...@@ -7,7 +7,8 @@ ccflags-y := -I $(srctree)/drivers/net/ethernet/hisilicon/hns3
hns-roce-objs := hns_roce_main.o hns_roce_cmd.o hns_roce_pd.o \ hns-roce-objs := hns_roce_main.o hns_roce_cmd.o hns_roce_pd.o \
hns_roce_ah.o hns_roce_hem.o hns_roce_mr.o hns_roce_qp.o \ hns_roce_ah.o hns_roce_hem.o hns_roce_mr.o hns_roce_qp.o \
hns_roce_cq.o hns_roce_alloc.o hns_roce_db.o hns_roce_srq.o hns_roce_restrack.o hns_roce_cq.o hns_roce_alloc.o hns_roce_db.o hns_roce_srq.o hns_roce_restrack.o \
hns_roce_bond.o
ifdef CONFIG_INFINIBAND_HNS_HIP08 ifdef CONFIG_INFINIBAND_HNS_HIP08
hns-roce-hw-v2-objs := hns_roce_hw_v2.o $(hns-roce-objs) hns-roce-hw-v2-objs := hns_roce_hw_v2.o $(hns-roce-objs)
......
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2022 Hisilicon Limited.
*/
#include <linux/pci.h>
#include "hnae3.h"
#include "hns_roce_device.h"
#include "hns_roce_hw_v2.h"
#include "hns_roce_bond.h"
static DEFINE_MUTEX(roce_bond_mutex);
static struct hns_roce_dev *hns_roce_get_hrdev_by_netdev(struct net_device *net_dev)
{
struct hns_roce_dev *hr_dev;
struct ib_device *ibdev;
ibdev = ib_device_get_by_netdev(net_dev, RDMA_DRIVER_HNS);
if (!ibdev)
return NULL;
hr_dev = container_of(ibdev, struct hns_roce_dev, ib_dev);
ib_device_put(ibdev);
return hr_dev;
}
bool hns_roce_bond_is_active(struct hns_roce_dev *hr_dev)
{
struct net_device *upper_dev;
struct net_device *net_dev;
if (!netif_is_lag_port(hr_dev->iboe.netdevs[0]))
return false;
rcu_read_lock();
upper_dev = netdev_master_upper_dev_get_rcu(hr_dev->iboe.netdevs[0]);
for_each_netdev_in_bond_rcu(upper_dev, net_dev) {
hr_dev = hns_roce_get_hrdev_by_netdev(net_dev);
if (hr_dev && hr_dev->bond_grp &&
hr_dev->bond_grp->bond_state == HNS_ROCE_BOND_IS_BONDED) {
rcu_read_unlock();
return true;
}
}
rcu_read_unlock();
return false;
}
struct net_device *hns_roce_get_bond_netdev(struct hns_roce_dev *hr_dev)
{
struct hns_roce_bond_group *bond_grp = hr_dev->bond_grp;
struct net_device *net_dev = NULL;
int i;
if (!(hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_BOND))
return NULL;
if (!netif_is_lag_port(hr_dev->iboe.netdevs[0]))
return NULL;
if (!bond_grp)
return NULL;
mutex_lock(&bond_grp->bond_mutex);
if (bond_grp->bond_state != HNS_ROCE_BOND_IS_BONDED)
goto out;
if (bond_grp->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) {
net_dev = bond_grp->bond_func_info[i].net_dev;
if (net_dev &&
bond_grp->bond_func_info[i].state.tx_enabled)
break;
}
} else {
for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) {
net_dev = bond_grp->bond_func_info[i].net_dev;
if (net_dev && get_port_state(net_dev) == IB_PORT_ACTIVE)
break;
}
}
out:
mutex_unlock(&bond_grp->bond_mutex);
return net_dev;
}
static void hns_roce_queue_bond_work(struct hns_roce_dev *hr_dev,
unsigned long delay)
{
schedule_delayed_work(&hr_dev->bond_work, delay);
}
static void hns_roce_bond_get_active_slave(struct hns_roce_bond_group *bond_grp)
{
struct net_device *net_dev;
u32 active_slave_map = 0;
u8 active_slave_num = 0;
bool active;
u8 i;
for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) {
net_dev = bond_grp->bond_func_info[i].net_dev;
if (net_dev) {
active = (bond_grp->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) ?
bond_grp->bond_func_info[i].state.tx_enabled :
bond_grp->bond_func_info[i].state.link_up;
if (active) {
active_slave_num++;
active_slave_map |= (1 << i);
}
}
}
bond_grp->active_slave_num = active_slave_num;
bond_grp->active_slave_map = active_slave_map;
}
static struct hns_roce_dev
*hns_roce_bond_init_client(struct hns_roce_bond_group *bond_grp,
int func_idx)
{
struct hnae3_handle *handle;
int ret;
handle = bond_grp->bond_func_info[func_idx].handle;
ret = hns_roce_hw_v2_init_instance(handle);
if (ret)
return NULL;
return handle->priv;
}
static void hns_roce_bond_uninit_client(struct hns_roce_bond_group *bond_grp,
int func_idx)
{
struct hnae3_handle *handle;
handle = bond_grp->bond_func_info[func_idx].handle;
hns_roce_hw_v2_uninit_instance(handle, 0);
}
static void hns_roce_set_bond(struct hns_roce_bond_group *bond_grp)
{
u8 main_func_idx = PCI_FUNC(bond_grp->main_hr_dev->pci_dev->devfn);
struct net_device *main_net_dev = bond_grp->main_net_dev;
struct hns_roce_dev *hr_dev;
struct net_device *net_dev;
int ret;
int i;
hns_roce_bond_get_active_slave(bond_grp);
/* bond_grp will be kfree during uninit_instance of main_hr_dev.
* Thus the main_hr_dev is switched before the uninit_instance
* of the previous main_hr_dev.
*/
for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) {
net_dev = bond_grp->bond_func_info[i].net_dev;
if (net_dev && net_dev != main_net_dev)
hns_roce_bond_uninit_client(bond_grp, i);
}
bond_grp->bond_state = HNS_ROCE_BOND_IS_BONDED;
for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) {
net_dev = bond_grp->bond_func_info[i].net_dev;
if (net_dev && net_dev != main_net_dev) {
hr_dev = hns_roce_bond_init_client(bond_grp, i);
if (hr_dev) {
bond_grp->bond_id =
hr_dev->ib_dev.name[ROCE_BOND_NAME_ID_IDX]
- '0';
bond_grp->main_hr_dev->bond_grp = NULL;
bond_grp->main_hr_dev = hr_dev;
bond_grp->main_net_dev = net_dev;
hr_dev->bond_grp = bond_grp;
break;
}
}
}
if (!hr_dev)
return;
hns_roce_bond_uninit_client(bond_grp, main_func_idx);
ret = hns_roce_cmd_bond(hr_dev, HNS_ROCE_SET_BOND);
if (ret) {
ibdev_err(&hr_dev->ib_dev, "failed to set RoCE bond!\n");
return;
}
ibdev_info(&hr_dev->ib_dev, "RoCE set bond finished!\n");
}
static void hns_roce_clear_bond(struct hns_roce_bond_group *bond_grp)
{
u8 main_func_idx = PCI_FUNC(bond_grp->main_hr_dev->pci_dev->devfn);
struct net_device *main_net_dev = bond_grp->main_net_dev;
struct hnae3_handle *handle;
struct hns_roce_dev *hr_dev;
struct net_device *net_dev;
int ret;
int i;
bond_grp->bond_state = HNS_ROCE_BOND_NOT_BONDED;
for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) {
net_dev = bond_grp->bond_func_info[i].net_dev;
if (net_dev && net_dev != main_net_dev)
hns_roce_bond_init_client(bond_grp, i);
}
ret = hns_roce_cmd_bond(bond_grp->main_hr_dev, HNS_ROCE_CLEAR_BOND);
if (ret)
return;
handle = bond_grp->bond_func_info[main_func_idx].handle;
/* bond_grp will be freed in uninit_instance(main_net_dev) */
hns_roce_bond_uninit_client(bond_grp, main_func_idx);
ret = hns_roce_hw_v2_init_instance(handle);
if (ret) {
ibdev_err(&hr_dev->ib_dev, "failed to clear RoCE bond!\n");
return;
}
hr_dev = handle->priv;
ibdev_info(&hr_dev->ib_dev, "RoCE clear bond finished!\n");
}
static void hns_roce_slave_changestate(struct hns_roce_bond_group *bond_grp)
{
int ret;
hns_roce_bond_get_active_slave(bond_grp);
bond_grp->bond_state = HNS_ROCE_BOND_IS_BONDED;
ret = hns_roce_cmd_bond(bond_grp->main_hr_dev, HNS_ROCE_CHANGE_BOND);
if (ret) {
ibdev_err(&bond_grp->main_hr_dev->ib_dev,
"failed to change RoCE bond slave state!\n");
return;
}
ibdev_info(&bond_grp->main_hr_dev->ib_dev,
"RoCE slave changestate finished!\n");
}
static void hns_roce_slave_inc(struct hns_roce_bond_group *bond_grp)
{
u32 inc_slave_map = bond_grp->slave_map_diff;
u8 inc_func_idx = 0;
int ret;
hns_roce_bond_get_active_slave(bond_grp);
while (inc_slave_map > 0) {
if (inc_slave_map & 1)
hns_roce_bond_uninit_client(bond_grp, inc_func_idx);
inc_slave_map >>= 1;
inc_func_idx++;
}
bond_grp->bond_state = HNS_ROCE_BOND_IS_BONDED;
ret = hns_roce_cmd_bond(bond_grp->main_hr_dev, HNS_ROCE_CHANGE_BOND);
if (ret) {
ibdev_err(&bond_grp->main_hr_dev->ib_dev,
"failed to increase RoCE bond slave!\n");
return;
}
ibdev_info(&bond_grp->main_hr_dev->ib_dev,
"RoCE slave increase finished!\n");
}
static void hns_roce_slave_dec(struct hns_roce_bond_group *bond_grp)
{
u32 dec_slave_map = bond_grp->slave_map_diff;
struct hns_roce_dev *hr_dev;
struct net_device *net_dev;
u8 main_func_idx = 0;
u8 dec_func_idx = 0;
int ret;
int i;
hns_roce_bond_get_active_slave(bond_grp);
bond_grp->bond_state = HNS_ROCE_BOND_IS_BONDED;
main_func_idx = PCI_FUNC(bond_grp->main_hr_dev->pci_dev->devfn);
if (dec_slave_map & (1 << main_func_idx)) {
hns_roce_cmd_bond(hr_dev, HNS_ROCE_CLEAR_BOND);
for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) {
net_dev = bond_grp->bond_func_info[i].net_dev;
if (!(dec_slave_map & (1 << i)) && net_dev) {
hr_dev = hns_roce_bond_init_client(bond_grp, i);
if (hr_dev) {
bond_grp->main_hr_dev = hr_dev;
bond_grp->main_net_dev = net_dev;
hr_dev->bond_grp = bond_grp;
break;
}
}
}
hns_roce_bond_uninit_client(bond_grp, main_func_idx);
}
while (dec_slave_map > 0) {
if (dec_slave_map & 1) {
hns_roce_bond_init_client(bond_grp, dec_func_idx);
bond_grp->bond_func_info[dec_func_idx].net_dev = NULL;
}
dec_slave_map >>= 1;
dec_func_idx++;
}
if (bond_grp->slave_map_diff & (1 << main_func_idx))
ret = hns_roce_cmd_bond(hr_dev, HNS_ROCE_SET_BOND);
else
ret = hns_roce_cmd_bond(bond_grp->main_hr_dev,
HNS_ROCE_CHANGE_BOND);
if (ret) {
ibdev_err(&bond_grp->main_hr_dev->ib_dev,
"failed to decrease RoCE bond slave!\n");
return;
}
ibdev_info(&bond_grp->main_hr_dev->ib_dev,
"RoCE slave decrease finished!\n");
}
static void hns_roce_do_bond(struct hns_roce_bond_group *bond_grp)
{
enum hns_roce_bond_state bond_state;
bool bond_ready;
bond_ready = bond_grp->bond_ready;
bond_state = bond_grp->bond_state;
ibdev_info(&bond_grp->main_hr_dev->ib_dev,
"do_bond: bond_ready - %d, bond_state - %d.\n",
bond_ready, bond_grp->bond_state);
if (bond_ready && bond_state == HNS_ROCE_BOND_NOT_BONDED)
hns_roce_set_bond(bond_grp);
else if (bond_ready && bond_state == HNS_ROCE_BOND_SLAVE_CHANGESTATE)
hns_roce_slave_changestate(bond_grp);
else if (bond_ready && bond_state == HNS_ROCE_BOND_SLAVE_INC)
hns_roce_slave_inc(bond_grp);
else if (bond_ready && bond_state == HNS_ROCE_BOND_SLAVE_DEC)
hns_roce_slave_dec(bond_grp);
else if (!bond_ready && bond_state != HNS_ROCE_BOND_NOT_BONDED)
hns_roce_clear_bond(bond_grp);
}
void hns_roce_do_bond_work(struct work_struct *work)
{
struct delayed_work *delayed_work;
struct hns_roce_dev *hr_dev;
int status;
delayed_work = to_delayed_work(work);
hr_dev = container_of(delayed_work, struct hns_roce_dev, bond_work);
status = mutex_trylock(&roce_bond_mutex);
if (!status) {
/* delay 1 sec */
hns_roce_queue_bond_work(hr_dev, HZ);
return;
}
hns_roce_do_bond(hr_dev->bond_grp);
mutex_unlock(&roce_bond_mutex);
}
int hns_roce_bond_init(struct hns_roce_dev *hr_dev)
{
int ret;
INIT_DELAYED_WORK(&hr_dev->bond_work, hns_roce_do_bond_work);
hr_dev->bond_nb.notifier_call = hns_roce_bond_event;
ret = register_netdevice_notifier(&hr_dev->bond_nb);
if (ret) {
ibdev_err(&hr_dev->ib_dev,
"failed to register notifier for RoCE bond!\n");
hr_dev->bond_nb.notifier_call = NULL;
}
return ret;
}
void hns_roce_cleanup_bond(struct hns_roce_dev *hr_dev)
{
unregister_netdevice_notifier(&hr_dev->bond_nb);
cancel_delayed_work(&hr_dev->bond_work);
if (hr_dev->bond_grp && hr_dev == hr_dev->bond_grp->main_hr_dev)
kfree(hr_dev->bond_grp);
hr_dev->bond_grp = NULL;
}
static bool hns_roce_bond_lowerstate_event(struct hns_roce_dev *hr_dev,
struct netdev_notifier_changelowerstate_info *info)
{
struct hns_roce_bond_group *bond_grp = hr_dev->bond_grp;
struct netdev_lag_lower_state_info *bond_lower_info;
struct net_device *net_dev;
int i;
net_dev = netdev_notifier_info_to_dev((struct netdev_notifier_info *)info);
if (!netif_is_lag_port(net_dev))
return false;
bond_lower_info = info->lower_state_info;
if (!bond_lower_info)
return false;
if (!bond_grp) {
hr_dev->slave_state = *bond_lower_info;
return false;
}
mutex_lock(&bond_grp->bond_mutex);
for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) {
if (net_dev == bond_grp->bond_func_info[i].net_dev) {
bond_grp->bond_func_info[i].state = *bond_lower_info;
break;
}
}
if (bond_grp->bond_ready &&
bond_grp->bond_state == HNS_ROCE_BOND_IS_BONDED)
bond_grp->bond_state = HNS_ROCE_BOND_SLAVE_CHANGESTATE;
mutex_unlock(&bond_grp->bond_mutex);
return true;
}
static inline bool hns_roce_bond_mode_is_supported(enum netdev_lag_tx_type tx_type)
{
if (tx_type != NETDEV_LAG_TX_TYPE_ACTIVEBACKUP &&
tx_type != NETDEV_LAG_TX_TYPE_HASH)
return false;
return true;
}
static void hns_roce_bond_info_record(struct hns_roce_bond_group *bond_grp,
struct net_device *upper_dev)
{
struct hns_roce_v2_priv *priv;
struct hns_roce_dev *hr_dev;
struct net_device *net_dev;
u8 func_idx;
bond_grp->slave_num = 0;
bond_grp->slave_map = 0;
rcu_read_lock();
for_each_netdev_in_bond_rcu(upper_dev, net_dev) {
hr_dev = hns_roce_get_hrdev_by_netdev(net_dev);
if (hr_dev) {
func_idx = PCI_FUNC(hr_dev->pci_dev->devfn);
bond_grp->slave_map |= (1 << func_idx);
bond_grp->slave_num++;
if (!bond_grp->bond_func_info[func_idx].net_dev) {
priv = hr_dev->priv;
bond_grp->bond_func_info[func_idx].net_dev =
net_dev;
bond_grp->bond_func_info[func_idx].handle =
priv->handle;
bond_grp->bond_func_info[func_idx].state =
hr_dev->slave_state;
}
}
}
rcu_read_unlock();
}
static bool hns_roce_bond_upper_event(struct hns_roce_dev *hr_dev,
struct netdev_notifier_changeupper_info *info)
{
struct hns_roce_bond_group *bond_grp = hr_dev->bond_grp;
struct net_device *upper_dev = info->upper_dev;
struct netdev_lag_upper_info *bond_upper_info;
u32 pre_slave_map = bond_grp->slave_map;
u8 pre_slave_num = bond_grp->slave_num;
bool changed = false;
if (!upper_dev || !netif_is_lag_master(upper_dev))
return false;
if (info->linking)
bond_upper_info = info->upper_info;
mutex_lock(&bond_grp->bond_mutex);
if (bond_upper_info)
bond_grp->tx_type = bond_upper_info->tx_type;
hns_roce_bond_info_record(bond_grp, upper_dev);
bond_grp->bond = netdev_priv(upper_dev);
if (!hns_roce_bond_mode_is_supported(bond_grp->tx_type) ||
bond_grp->slave_num <= 1) {
changed = bond_grp->bond_ready;
bond_grp->bond_ready = false;
goto out;
}
if (bond_grp->bond_state == HNS_ROCE_BOND_NOT_BONDED) {
bond_grp->bond_ready = true;
changed = true;
} else if (bond_grp->bond_state == HNS_ROCE_BOND_IS_BONDED &&
bond_grp->slave_num != pre_slave_num) {
bond_grp->bond_state = bond_grp->slave_num > pre_slave_num ?
HNS_ROCE_BOND_SLAVE_INC :
HNS_ROCE_BOND_SLAVE_DEC;
bond_grp->slave_map_diff = pre_slave_map ^ bond_grp->slave_map;
bond_grp->bond_ready = true;
changed = true;
}
out:
mutex_unlock(&bond_grp->bond_mutex);
return changed;
}
static struct hns_roce_bond_group *hns_roce_alloc_bond_grp(struct hns_roce_dev *main_hr_dev,
struct net_device *upper_dev)
{
struct hns_roce_bond_group *bond_grp;
bond_grp = kzalloc(sizeof(*bond_grp), GFP_KERNEL);
if (!bond_grp)
return NULL;
mutex_init(&bond_grp->bond_mutex);
bond_grp->upper_dev = upper_dev;
bond_grp->main_hr_dev = main_hr_dev;
bond_grp->main_net_dev = main_hr_dev->iboe.netdevs[0];
bond_grp->bond_ready = false;
bond_grp->bond_state = HNS_ROCE_BOND_NOT_BONDED;
hns_roce_bond_info_record(bond_grp, upper_dev);
return bond_grp;
}
static bool hns_roce_is_slave(struct net_device *bond,
struct net_device *net_dev)
{
struct net_device *upper_dev;
rcu_read_lock();
upper_dev = netdev_master_upper_dev_get_rcu(net_dev);
rcu_read_unlock();
return bond == upper_dev;
}
static bool hns_roce_is_bond_grp_exist(struct net_device *upper_dev)
{
struct hns_roce_dev *hr_dev;
struct net_device *net_dev;
rcu_read_lock();
for_each_netdev_in_bond_rcu(upper_dev, net_dev) {
hr_dev = hns_roce_get_hrdev_by_netdev(net_dev);
if (hr_dev && hr_dev->bond_grp) {
rcu_read_unlock();
return true;
}
}
rcu_read_unlock();
return false;
}
int hns_roce_bond_event(struct notifier_block *self,
unsigned long event, void *ptr)
{
struct net_device *net_dev = netdev_notifier_info_to_dev(ptr);
struct hns_roce_dev *hr_dev =
container_of(self, struct hns_roce_dev, bond_nb);
struct net_device *upper_dev;
bool changed;
if (event != NETDEV_CHANGEUPPER && event != NETDEV_CHANGELOWERSTATE)
return NOTIFY_DONE;
rcu_read_lock();
upper_dev = netdev_master_upper_dev_get_rcu(net_dev);
rcu_read_unlock();
if (event == NETDEV_CHANGELOWERSTATE && !upper_dev &&
hr_dev != hns_roce_get_hrdev_by_netdev(net_dev))
return NOTIFY_DONE;
if (upper_dev) {
if (!hns_roce_is_slave(upper_dev, hr_dev->iboe.netdevs[0]))
return NOTIFY_DONE;
mutex_lock(&roce_bond_mutex);
if (!hr_dev->bond_grp) {
if (hns_roce_is_bond_grp_exist(upper_dev)) {
mutex_unlock(&roce_bond_mutex);
return NOTIFY_DONE;
}
hr_dev->bond_grp = hns_roce_alloc_bond_grp(hr_dev,
upper_dev);
if (!hr_dev->bond_grp) {
ibdev_err(&hr_dev->ib_dev,
"failed to alloc RoCE bond_grp!\n");
mutex_unlock(&roce_bond_mutex);
return NOTIFY_DONE;
}
}
mutex_unlock(&roce_bond_mutex);
}
changed = (event == NETDEV_CHANGEUPPER) ?
hns_roce_bond_upper_event(hr_dev, ptr) :
hns_roce_bond_lowerstate_event(hr_dev, ptr);
if (changed)
hns_roce_queue_bond_work(hr_dev, HZ);
return NOTIFY_DONE;
}
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2022 Hisilicon Limited.
*/
#ifndef _HNS_ROCE_BOND_H
#define _HNS_ROCE_BOND_H
#include <linux/netdevice.h>
#include <net/bonding.h>
#define ROCE_BOND_FUNC_MAX 4
#define ROCE_BOND_NAME_ID_IDX 9
enum {
BOND_MODE_1,
BOND_MODE_2_4,
};
enum hns_roce_bond_state {
HNS_ROCE_BOND_NOT_BONDED,
HNS_ROCE_BOND_IS_BONDED,
HNS_ROCE_BOND_SLAVE_INC,
HNS_ROCE_BOND_SLAVE_DEC,
HNS_ROCE_BOND_SLAVE_CHANGESTATE,
};
enum hns_roce_bond_cmd_type {
HNS_ROCE_SET_BOND,
HNS_ROCE_CHANGE_BOND,
HNS_ROCE_CLEAR_BOND,
};
struct hns_roce_func_info {
struct net_device *net_dev;
struct hnae3_handle *handle;
struct netdev_lag_lower_state_info state;
};
struct hns_roce_bond_group {
struct net_device *upper_dev;
struct net_device *main_net_dev;
struct hns_roce_dev *main_hr_dev;
u8 slave_num;
u8 active_slave_num;
u32 slave_map;
u32 active_slave_map;
u32 slave_map_diff;
u8 bond_id;
struct bonding *bond;
bool bond_ready;
enum hns_roce_bond_state bond_state;
enum netdev_lag_tx_type tx_type;
/*
* A mutex which protect bond_grp info
*/
struct mutex bond_mutex;
struct hns_roce_func_info bond_func_info[ROCE_BOND_FUNC_MAX];
};
int hns_roce_bond_init(struct hns_roce_dev *hr_dev);
int hns_roce_bond_event(struct notifier_block *self,
unsigned long event, void *ptr);
void hns_roce_cleanup_bond(struct hns_roce_dev *hr_dev);
bool hns_roce_bond_is_active(struct hns_roce_dev *hr_dev);
struct net_device *hns_roce_get_bond_netdev(struct hns_roce_dev *hr_dev);
#endif
...@@ -35,6 +35,7 @@ ...@@ -35,6 +35,7 @@
#include <rdma/ib_verbs.h> #include <rdma/ib_verbs.h>
#include <rdma/hns-abi.h> #include <rdma/hns-abi.h>
#include "hns_roce_bond.h"
#define PCI_REVISION_ID_HIP08 0x21 #define PCI_REVISION_ID_HIP08 0x21
#define PCI_REVISION_ID_HIP09 0x30 #define PCI_REVISION_ID_HIP09 0x30
...@@ -147,6 +148,7 @@ enum { ...@@ -147,6 +148,7 @@ enum {
HNS_ROCE_CAP_FLAG_STASH = BIT(17), HNS_ROCE_CAP_FLAG_STASH = BIT(17),
HNS_ROCE_CAP_FLAG_CQE_INLINE = BIT(19), HNS_ROCE_CAP_FLAG_CQE_INLINE = BIT(19),
HNS_ROCE_CAP_FLAG_RQ_INLINE = BIT(20), HNS_ROCE_CAP_FLAG_RQ_INLINE = BIT(20),
HNS_ROCE_CAP_FLAG_BOND = BIT(21),
}; };
#define HNS_ROCE_DB_TYPE_COUNT 2 #define HNS_ROCE_DB_TYPE_COUNT 2
...@@ -898,6 +900,9 @@ struct hns_roce_hw { ...@@ -898,6 +900,9 @@ struct hns_roce_hw {
u8 *tc_mode, u8 *priority); u8 *tc_mode, u8 *priority);
const struct ib_device_ops *hns_roce_dev_ops; const struct ib_device_ops *hns_roce_dev_ops;
const struct ib_device_ops *hns_roce_dev_srq_ops; const struct ib_device_ops *hns_roce_dev_srq_ops;
int (*bond_init)(struct hns_roce_dev *hr_dev);
bool (*bond_is_active)(struct hns_roce_dev *hr_dev);
struct net_device *(*get_bond_netdev)(struct hns_roce_dev *hr_dev);
}; };
struct hns_roce_dev { struct hns_roce_dev {
...@@ -961,6 +966,11 @@ struct hns_roce_dev { ...@@ -961,6 +966,11 @@ struct hns_roce_dev {
u32 is_vf; u32 is_vf;
u32 cong_algo_tmpl_id; u32 cong_algo_tmpl_id;
u64 dwqe_page; u64 dwqe_page;
struct notifier_block bond_nb;
struct delayed_work bond_work;
struct hns_roce_bond_group *bond_grp;
struct netdev_lag_lower_state_info slave_state;
}; };
static inline struct hns_roce_dev *to_hr_dev(struct ib_device *ib_dev) static inline struct hns_roce_dev *to_hr_dev(struct ib_device *ib_dev)
......
...@@ -1350,6 +1350,61 @@ static int hns_roce_cmq_send(struct hns_roce_dev *hr_dev, ...@@ -1350,6 +1350,61 @@ static int hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
return ret; return ret;
} }
static inline enum hns_roce_opcode_type
get_bond_opcode(enum hns_roce_bond_cmd_type bond_type)
{
if (bond_type == HNS_ROCE_SET_BOND)
return HNS_ROCE_OPC_SET_BOND_INFO;
else if (bond_type == HNS_ROCE_CHANGE_BOND)
return HNS_ROCE_OPC_CHANGE_ACTIVE_PORT;
else
return HNS_ROCE_OPC_CLEAR_BOND_INFO;
}
int hns_roce_cmd_bond(struct hns_roce_dev *hr_dev,
enum hns_roce_bond_cmd_type bond_type)
{
enum hns_roce_opcode_type opcode = get_bond_opcode(bond_type);
struct hns_roce_bond_info *slave_info;
struct hns_roce_cmq_desc desc = { 0 };
int ret;
slave_info = (struct hns_roce_bond_info *)desc.data;
hns_roce_cmq_setup_basic_desc(&desc, opcode, false);
slave_info->bond_id = cpu_to_le32(hr_dev->bond_grp->bond_id);
if (bond_type == HNS_ROCE_CLEAR_BOND)
goto out;
if (hr_dev->bond_grp->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
slave_info->bond_mode = cpu_to_le32(BOND_MODE_1);
if (hr_dev->bond_grp->active_slave_num != 1)
ibdev_err(&hr_dev->ib_dev,
"active slave cnt(%d) in Mode 1 is invalid.\n",
hr_dev->bond_grp->active_slave_num);
} else {
slave_info->bond_mode = cpu_to_le32(BOND_MODE_2_4);
slave_info->hash_policy =
cpu_to_le32(hr_dev->bond_grp->bond->params.xmit_policy);
}
slave_info->active_slave_cnt =
cpu_to_le32(hr_dev->bond_grp->active_slave_num);
slave_info->active_slave_mask =
cpu_to_le32(hr_dev->bond_grp->active_slave_map);
slave_info->slave_mask =
cpu_to_le32(hr_dev->bond_grp->slave_map);
out:
ret = hns_roce_cmq_send(hr_dev, &desc, 1);
if (ret)
ibdev_err(&hr_dev->ib_dev,
"cmq bond type(%d) failed, ret = %d.\n",
bond_type, ret);
return ret;
}
static int config_hem_ba_to_hw(struct hns_roce_dev *hr_dev, static int config_hem_ba_to_hw(struct hns_roce_dev *hr_dev,
dma_addr_t base_addr, u8 cmd, unsigned long tag) dma_addr_t base_addr, u8 cmd, unsigned long tag)
{ {
...@@ -6781,6 +6836,9 @@ static const struct hns_roce_hw hns_roce_hw_v2 = { ...@@ -6781,6 +6836,9 @@ static const struct hns_roce_hw hns_roce_hw_v2 = {
.get_dscp = hns_roce_hw_v2_get_dscp, .get_dscp = hns_roce_hw_v2_get_dscp,
.hns_roce_dev_ops = &hns_roce_v2_dev_ops, .hns_roce_dev_ops = &hns_roce_v2_dev_ops,
.hns_roce_dev_srq_ops = &hns_roce_v2_dev_srq_ops, .hns_roce_dev_srq_ops = &hns_roce_v2_dev_srq_ops,
.bond_init = hns_roce_bond_init,
.bond_is_active = hns_roce_bond_is_active,
.get_bond_netdev = hns_roce_get_bond_netdev,
}; };
static const struct pci_device_id hns_roce_hw_v2_pci_tbl[] = { static const struct pci_device_id hns_roce_hw_v2_pci_tbl[] = {
...@@ -6903,7 +6961,7 @@ static void __hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle, ...@@ -6903,7 +6961,7 @@ static void __hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle,
ib_dealloc_device(&hr_dev->ib_dev); ib_dealloc_device(&hr_dev->ib_dev);
} }
static int hns_roce_hw_v2_init_instance(struct hnae3_handle *handle) int hns_roce_hw_v2_init_instance(struct hnae3_handle *handle)
{ {
const struct hnae3_ae_ops *ops = handle->ae_algo->ops; const struct hnae3_ae_ops *ops = handle->ae_algo->ops;
const struct pci_device_id *id; const struct pci_device_id *id;
...@@ -6946,8 +7004,7 @@ static int hns_roce_hw_v2_init_instance(struct hnae3_handle *handle) ...@@ -6946,8 +7004,7 @@ static int hns_roce_hw_v2_init_instance(struct hnae3_handle *handle)
return -EBUSY; return -EBUSY;
} }
static void hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle, void hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle, bool reset)
bool reset)
{ {
if (handle->rinfo.instance_state != HNS_ROCE_STATE_INITED) if (handle->rinfo.instance_state != HNS_ROCE_STATE_INITED)
return; return;
......
...@@ -252,6 +252,9 @@ enum hns_roce_opcode_type { ...@@ -252,6 +252,9 @@ enum hns_roce_opcode_type {
HNS_ROCE_OPC_EXT_CFG = 0x8512, HNS_ROCE_OPC_EXT_CFG = 0x8512,
HNS_ROCE_QUERY_RAM_ECC = 0x8513, HNS_ROCE_QUERY_RAM_ECC = 0x8513,
HNS_SWITCH_PARAMETER_CFG = 0x1033, HNS_SWITCH_PARAMETER_CFG = 0x1033,
HNS_ROCE_OPC_SET_BOND_INFO = 0x8601,
HNS_ROCE_OPC_CLEAR_BOND_INFO = 0x8602,
HNS_ROCE_OPC_CHANGE_ACTIVE_PORT = 0x8603,
}; };
enum { enum {
...@@ -1464,11 +1467,25 @@ struct hns_roce_sccc_clr_done { ...@@ -1464,11 +1467,25 @@ struct hns_roce_sccc_clr_done {
__le32 rsv[5]; __le32 rsv[5];
}; };
struct hns_roce_bond_info {
__le32 bond_id;
__le32 bond_mode;
__le32 active_slave_cnt;
__le32 active_slave_mask;
__le32 slave_mask;
__le32 hash_policy;
};
int hns_roce_hw_v2_init_instance(struct hnae3_handle *handle);
void hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle, bool reset);
int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata); int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata);
int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev, int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev,
struct hns_roce_qp *hr_qp, struct hns_roce_qp *hr_qp,
struct ib_udata *udata); struct ib_udata *udata);
int hns_roce_cmd_bond(struct hns_roce_dev *hr_dev,
enum hns_roce_bond_cmd_type bond_type);
static inline void hns_roce_write64(struct hns_roce_dev *hr_dev, __le32 val[2], static inline void hns_roce_write64(struct hns_roce_dev *hr_dev, __le32 val[2],
void __iomem *dest) void __iomem *dest)
......
...@@ -37,9 +37,12 @@ ...@@ -37,9 +37,12 @@
#include <rdma/ib_smi.h> #include <rdma/ib_smi.h>
#include <rdma/ib_user_verbs.h> #include <rdma/ib_user_verbs.h>
#include <rdma/ib_cache.h> #include <rdma/ib_cache.h>
#include "hnae3.h"
#include "hns_roce_common.h" #include "hns_roce_common.h"
#include "hns_roce_device.h" #include "hns_roce_device.h"
#include "hns_roce_hem.h" #include "hns_roce_hem.h"
#include "hns_roce_hw_v2.h"
static int hns_roce_set_mac(struct hns_roce_dev *hr_dev, u32 port, static int hns_roce_set_mac(struct hns_roce_dev *hr_dev, u32 port,
const u8 *addr) const u8 *addr)
...@@ -259,7 +262,9 @@ static int hns_roce_query_port(struct ib_device *ib_dev, u8 port_num, ...@@ -259,7 +262,9 @@ static int hns_roce_query_port(struct ib_device *ib_dev, u8 port_num,
spin_lock_irqsave(&hr_dev->iboe.lock, flags); spin_lock_irqsave(&hr_dev->iboe.lock, flags);
net_dev = hr_dev->iboe.netdevs[port]; net_dev = hr_dev->hw->get_bond_netdev(hr_dev);
if (!net_dev)
net_dev = hr_dev->iboe.netdevs[port];
if (!net_dev) { if (!net_dev) {
spin_unlock_irqrestore(&hr_dev->iboe.lock, flags); spin_unlock_irqrestore(&hr_dev->iboe.lock, flags);
dev_err(dev, "Find netdev %u failed!\n", port); dev_err(dev, "Find netdev %u failed!\n", port);
...@@ -534,6 +539,9 @@ static void hns_roce_unregister_device(struct hns_roce_dev *hr_dev) ...@@ -534,6 +539,9 @@ static void hns_roce_unregister_device(struct hns_roce_dev *hr_dev)
{ {
struct hns_roce_ib_iboe *iboe = &hr_dev->iboe; struct hns_roce_ib_iboe *iboe = &hr_dev->iboe;
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_BOND)
hns_roce_cleanup_bond(hr_dev);
hr_dev->active = false; hr_dev->active = false;
unregister_netdevice_notifier(&iboe->nb); unregister_netdevice_notifier(&iboe->nb);
ib_unregister_device(&hr_dev->ib_dev); ib_unregister_device(&hr_dev->ib_dev);
...@@ -711,7 +719,12 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev) ...@@ -711,7 +719,12 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev)
return ret; return ret;
} }
dma_set_max_seg_size(dev, UINT_MAX); dma_set_max_seg_size(dev, UINT_MAX);
ret = ib_register_device(ib_dev, "hns_%d", dev);
if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_BOND) &&
(hr_dev->hw->bond_is_active(hr_dev)))
ret = ib_register_device(ib_dev, "hns_bond_%d", dev);
else
ret = ib_register_device(ib_dev, "hns_%d", dev);
if (ret) { if (ret) {
dev_err(dev, "ib_register_device failed!\n"); dev_err(dev, "ib_register_device failed!\n");
return ret; return ret;
...@@ -730,8 +743,15 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev) ...@@ -730,8 +743,15 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev)
goto error_failed_setup_mtu_mac; goto error_failed_setup_mtu_mac;
} }
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_BOND) {
ret = hr_dev->hw->bond_init(hr_dev);
if (ret)
dev_err(dev, "roce bond init failed, ret = %d\n", ret);
}
hr_dev->active = true; hr_dev->active = true;
return 0;
return ret;
error_failed_setup_mtu_mac: error_failed_setup_mtu_mac:
ib_unregister_device(ib_dev); ib_unregister_device(ib_dev);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册