提交 55fdbb01 编写于 作者: D David S. Miller

Merge branch 'mlx5-next'

Saeed Mahameed says:

====================
Mellanox 100G mlx5 seamless error recovery

This series from Mohamad improves the driver load/unload flows
to seamlessly handle pci errors and device internal errors recovery
reset flows.

Current pci and internal error handling is too heavy and is done
with a full restart of the driver by unregistering mlx5 interfaces
(mlx5e netedevs and mlx5_ib) which will cause losing all the current
interfaces and mlx5 core configurations.

To improve this, we add new callback functions of mlx5 interface
object (attach/detach) to be called upon reset flows when errors are
detected rather than calling register and unregister interfaces.

On their side, interfaces such as (mlx5e and mlx5_ib) can choose to implement
those callback, if not, the old heavy reset will be called for that interface.

For non-interface mlx5 modules such as sriov and eswitch, we refactored
and reorganized the code in a way that the software state objects are created
only once on driver load.  Those software state objects are kept upon reset recovery
flows and only freed once on driver unload. On seamless soft reset flows, only
hardware resources are released on stop and re-allocated on start according to the
current soft state.

In this series only mlx5e interface implements attach/detach callbacks
so that the netdevice will be kept alive on reset. On detach only hardware resources
are released and the netdevice will be marked as detached to the stack. Once
attached again it will re-allocate the hardware resources according to the current
netdevice state, and all the configurations and the software state will be kept or restored
after recovery.

Note: I will be out of office all next week, in case of any updates
or V2 is required, Tariq will post the new series, I hope it is ok.
====================
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
......@@ -3,7 +3,7 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o
mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \
mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o \
fs_counters.o rl.o lag.o
fs_counters.o rl.o lag.o dev.o
mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o eswitch_offloads.o \
en_main.o en_common.o en_fs.o en_ethtool.o en_tx.o \
......
/*
* Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/mlx5/driver.h>
#include "mlx5_core.h"
static LIST_HEAD(intf_list);
static LIST_HEAD(mlx5_dev_list);
/* intf dev list mutex */
static DEFINE_MUTEX(mlx5_intf_mutex);
struct mlx5_device_context {
struct list_head list;
struct mlx5_interface *intf;
void *context;
unsigned long state;
};
enum {
MLX5_INTERFACE_ADDED,
MLX5_INTERFACE_ATTACHED,
};
void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
{
struct mlx5_device_context *dev_ctx;
struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv);
if (!mlx5_lag_intf_add(intf, priv))
return;
dev_ctx = kzalloc(sizeof(*dev_ctx), GFP_KERNEL);
if (!dev_ctx)
return;
dev_ctx->intf = intf;
dev_ctx->context = intf->add(dev);
set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state);
if (intf->attach)
set_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state);
if (dev_ctx->context) {
spin_lock_irq(&priv->ctx_lock);
list_add_tail(&dev_ctx->list, &priv->ctx_list);
spin_unlock_irq(&priv->ctx_lock);
} else {
kfree(dev_ctx);
}
}
static struct mlx5_device_context *mlx5_get_device(struct mlx5_interface *intf,
struct mlx5_priv *priv)
{
struct mlx5_device_context *dev_ctx;
list_for_each_entry(dev_ctx, &priv->ctx_list, list)
if (dev_ctx->intf == intf)
return dev_ctx;
return NULL;
}
void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
{
struct mlx5_device_context *dev_ctx;
struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv);
dev_ctx = mlx5_get_device(intf, priv);
if (!dev_ctx)
return;
spin_lock_irq(&priv->ctx_lock);
list_del(&dev_ctx->list);
spin_unlock_irq(&priv->ctx_lock);
if (test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state))
intf->remove(dev, dev_ctx->context);
kfree(dev_ctx);
}
static void mlx5_attach_interface(struct mlx5_interface *intf, struct mlx5_priv *priv)
{
struct mlx5_device_context *dev_ctx;
struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv);
dev_ctx = mlx5_get_device(intf, priv);
if (!dev_ctx)
return;
if (intf->attach) {
if (test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state))
return;
intf->attach(dev, dev_ctx->context);
set_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state);
} else {
if (test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state))
return;
dev_ctx->context = intf->add(dev);
set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state);
}
}
void mlx5_attach_device(struct mlx5_core_dev *dev)
{
struct mlx5_priv *priv = &dev->priv;
struct mlx5_interface *intf;
mutex_lock(&mlx5_intf_mutex);
list_for_each_entry(intf, &intf_list, list)
mlx5_attach_interface(intf, priv);
mutex_unlock(&mlx5_intf_mutex);
}
static void mlx5_detach_interface(struct mlx5_interface *intf, struct mlx5_priv *priv)
{
struct mlx5_device_context *dev_ctx;
struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv);
dev_ctx = mlx5_get_device(intf, priv);
if (!dev_ctx)
return;
if (intf->detach) {
if (!test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state))
return;
intf->detach(dev, dev_ctx->context);
clear_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state);
} else {
if (!test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state))
return;
intf->remove(dev, dev_ctx->context);
clear_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state);
}
}
void mlx5_detach_device(struct mlx5_core_dev *dev)
{
struct mlx5_priv *priv = &dev->priv;
struct mlx5_interface *intf;
mutex_lock(&mlx5_intf_mutex);
list_for_each_entry(intf, &intf_list, list)
mlx5_detach_interface(intf, priv);
mutex_unlock(&mlx5_intf_mutex);
}
bool mlx5_device_registered(struct mlx5_core_dev *dev)
{
struct mlx5_priv *priv;
bool found = false;
mutex_lock(&mlx5_intf_mutex);
list_for_each_entry(priv, &mlx5_dev_list, dev_list)
if (priv == &dev->priv)
found = true;
mutex_unlock(&mlx5_intf_mutex);
return found;
}
int mlx5_register_device(struct mlx5_core_dev *dev)
{
struct mlx5_priv *priv = &dev->priv;
struct mlx5_interface *intf;
mutex_lock(&mlx5_intf_mutex);
list_add_tail(&priv->dev_list, &mlx5_dev_list);
list_for_each_entry(intf, &intf_list, list)
mlx5_add_device(intf, priv);
mutex_unlock(&mlx5_intf_mutex);
return 0;
}
void mlx5_unregister_device(struct mlx5_core_dev *dev)
{
struct mlx5_priv *priv = &dev->priv;
struct mlx5_interface *intf;
mutex_lock(&mlx5_intf_mutex);
list_for_each_entry(intf, &intf_list, list)
mlx5_remove_device(intf, priv);
list_del(&priv->dev_list);
mutex_unlock(&mlx5_intf_mutex);
}
int mlx5_register_interface(struct mlx5_interface *intf)
{
struct mlx5_priv *priv;
if (!intf->add || !intf->remove)
return -EINVAL;
mutex_lock(&mlx5_intf_mutex);
list_add_tail(&intf->list, &intf_list);
list_for_each_entry(priv, &mlx5_dev_list, dev_list)
mlx5_add_device(intf, priv);
mutex_unlock(&mlx5_intf_mutex);
return 0;
}
EXPORT_SYMBOL(mlx5_register_interface);
void mlx5_unregister_interface(struct mlx5_interface *intf)
{
struct mlx5_priv *priv;
mutex_lock(&mlx5_intf_mutex);
list_for_each_entry(priv, &mlx5_dev_list, dev_list)
mlx5_remove_device(intf, priv);
list_del(&intf->list);
mutex_unlock(&mlx5_intf_mutex);
}
EXPORT_SYMBOL(mlx5_unregister_interface);
void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol)
{
struct mlx5_priv *priv = &mdev->priv;
struct mlx5_device_context *dev_ctx;
unsigned long flags;
void *result = NULL;
spin_lock_irqsave(&priv->ctx_lock, flags);
list_for_each_entry(dev_ctx, &mdev->priv.ctx_list, list)
if ((dev_ctx->intf->protocol == protocol) &&
dev_ctx->intf->get_dev) {
result = dev_ctx->intf->get_dev(dev_ctx->context);
break;
}
spin_unlock_irqrestore(&priv->ctx_lock, flags);
return result;
}
EXPORT_SYMBOL(mlx5_get_protocol_dev);
/* Must be called with intf_mutex held */
void mlx5_add_dev_by_protocol(struct mlx5_core_dev *dev, int protocol)
{
struct mlx5_interface *intf;
list_for_each_entry(intf, &intf_list, list)
if (intf->protocol == protocol) {
mlx5_add_device(intf, &dev->priv);
break;
}
}
/* Must be called with intf_mutex held */
void mlx5_remove_dev_by_protocol(struct mlx5_core_dev *dev, int protocol)
{
struct mlx5_interface *intf;
list_for_each_entry(intf, &intf_list, list)
if (intf->protocol == protocol) {
mlx5_remove_device(intf, &dev->priv);
break;
}
}
static u16 mlx5_gen_pci_id(struct mlx5_core_dev *dev)
{
return (u16)((dev->pdev->bus->number << 8) |
PCI_SLOT(dev->pdev->devfn));
}
/* Must be called with intf_mutex held */
struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev)
{
u16 pci_id = mlx5_gen_pci_id(dev);
struct mlx5_core_dev *res = NULL;
struct mlx5_core_dev *tmp_dev;
struct mlx5_priv *priv;
list_for_each_entry(priv, &mlx5_dev_list, dev_list) {
tmp_dev = container_of(priv, struct mlx5_core_dev, priv);
if ((dev != tmp_dev) && (mlx5_gen_pci_id(tmp_dev) == pci_id)) {
res = tmp_dev;
break;
}
}
return res;
}
void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
unsigned long param)
{
struct mlx5_priv *priv = &dev->priv;
struct mlx5_device_context *dev_ctx;
unsigned long flags;
spin_lock_irqsave(&priv->ctx_lock, flags);
list_for_each_entry(dev_ctx, &priv->ctx_list, list)
if (dev_ctx->intf->event)
dev_ctx->intf->event(dev, dev_ctx->context, event, param);
spin_unlock_irqrestore(&priv->ctx_lock, flags);
}
void mlx5_dev_list_lock(void)
{
mutex_lock(&mlx5_intf_mutex);
}
void mlx5_dev_list_unlock(void)
{
mutex_unlock(&mlx5_intf_mutex);
}
int mlx5_dev_list_trylock(void)
{
return mutex_trylock(&mlx5_intf_mutex);
}
......@@ -844,9 +844,12 @@ void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv);
int mlx5e_close(struct net_device *netdev);
int mlx5e_open(struct net_device *netdev);
void mlx5e_update_stats_work(struct work_struct *work);
void *mlx5e_create_netdev(struct mlx5_core_dev *mdev,
const struct mlx5e_profile *profile, void *ppriv);
struct net_device *mlx5e_create_netdev(struct mlx5_core_dev *mdev,
const struct mlx5e_profile *profile,
void *ppriv);
void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, struct mlx5e_priv *priv);
int mlx5e_attach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev);
void mlx5e_detach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev);
struct rtnl_link_stats64 *
mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats);
......
......@@ -294,6 +294,36 @@ int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __always_unused __be16 proto,
return 0;
}
static void mlx5e_add_vlan_rules(struct mlx5e_priv *priv)
{
int i;
mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0);
for_each_set_bit(i, priv->fs.vlan.active_vlans, VLAN_N_VID) {
mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, i);
}
if (priv->fs.vlan.filter_disabled &&
!(priv->netdev->flags & IFF_PROMISC))
mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, 0);
}
static void mlx5e_del_vlan_rules(struct mlx5e_priv *priv)
{
int i;
mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0);
for_each_set_bit(i, priv->fs.vlan.active_vlans, VLAN_N_VID) {
mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, i);
}
if (priv->fs.vlan.filter_disabled &&
!(priv->netdev->flags & IFF_PROMISC))
mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, 0);
}
#define mlx5e_for_each_hash_node(hn, tmp, hash, i) \
for (i = 0; i < MLX5E_L2_ADDR_HASH_SIZE; i++) \
hlist_for_each_entry_safe(hn, tmp, &hash[i], hlist)
......@@ -1024,14 +1054,10 @@ static int mlx5e_create_vlan_table(struct mlx5e_priv *priv)
if (err)
goto err_free_g;
err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0);
if (err)
goto err_destroy_vlan_flow_groups;
mlx5e_add_vlan_rules(priv);
return 0;
err_destroy_vlan_flow_groups:
mlx5e_destroy_groups(ft);
err_free_g:
kfree(ft->g);
err_destroy_vlan_table:
......@@ -1043,6 +1069,7 @@ static int mlx5e_create_vlan_table(struct mlx5e_priv *priv)
static void mlx5e_destroy_vlan_table(struct mlx5e_priv *priv)
{
mlx5e_del_vlan_rules(priv);
mlx5e_destroy_flow_table(&priv->fs.vlan.ft);
}
......@@ -1100,7 +1127,6 @@ int mlx5e_create_flow_steering(struct mlx5e_priv *priv)
void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv)
{
mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0);
mlx5e_destroy_vlan_table(priv);
mlx5e_destroy_l2_table(priv);
mlx5e_destroy_ttc_table(priv);
......
......@@ -1883,6 +1883,9 @@ int mlx5e_close(struct net_device *netdev)
struct mlx5e_priv *priv = netdev_priv(netdev);
int err;
if (!netif_device_present(netdev))
return -ENODEV;
mutex_lock(&priv->state_lock);
err = mlx5e_close_locked(netdev);
mutex_unlock(&priv->state_lock);
......@@ -3401,13 +3404,13 @@ static const struct mlx5e_profile mlx5e_nic_profile = {
.max_tc = MLX5E_MAX_NUM_TC,
};
void *mlx5e_create_netdev(struct mlx5_core_dev *mdev,
const struct mlx5e_profile *profile, void *ppriv)
struct net_device *mlx5e_create_netdev(struct mlx5_core_dev *mdev,
const struct mlx5e_profile *profile,
void *ppriv)
{
int nch = profile->max_nch(mdev);
struct net_device *netdev;
struct mlx5e_priv *priv;
int nch = profile->max_nch(mdev);
int err;
netdev = alloc_etherdev_mqs(sizeof(struct mlx5e_priv),
nch * profile->max_tc,
......@@ -3425,12 +3428,31 @@ void *mlx5e_create_netdev(struct mlx5_core_dev *mdev,
priv->wq = create_singlethread_workqueue("mlx5e");
if (!priv->wq)
goto err_free_netdev;
goto err_cleanup_nic;
return netdev;
err_cleanup_nic:
profile->cleanup(priv);
free_netdev(netdev);
return NULL;
}
int mlx5e_attach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev)
{
const struct mlx5e_profile *profile;
struct mlx5e_priv *priv;
int err;
priv = netdev_priv(netdev);
profile = priv->profile;
clear_bit(MLX5E_STATE_DESTROYING, &priv->state);
err = mlx5e_create_umr_mkey(priv);
if (err) {
mlx5_core_err(mdev, "create umr mkey failed, %d\n", err);
goto err_destroy_wq;
goto out;
}
err = profile->init_tx(priv);
......@@ -3453,20 +3475,16 @@ void *mlx5e_create_netdev(struct mlx5_core_dev *mdev,
mlx5e_set_dev_port_mtu(netdev);
err = register_netdev(netdev);
if (err) {
mlx5_core_err(mdev, "register_netdev failed, %d\n", err);
goto err_dealloc_q_counters;
}
if (profile->enable)
profile->enable(priv);
return priv;
rtnl_lock();
if (netif_running(netdev))
mlx5e_open(netdev);
netif_device_attach(netdev);
rtnl_unlock();
err_dealloc_q_counters:
mlx5e_destroy_q_counter(priv);
profile->cleanup_rx(priv);
return 0;
err_close_drop_rq:
mlx5e_close_drop_rq(priv);
......@@ -3477,13 +3495,8 @@ void *mlx5e_create_netdev(struct mlx5_core_dev *mdev,
err_destroy_umr_mkey:
mlx5_core_destroy_mkey(mdev, &priv->umr_mkey);
err_destroy_wq:
destroy_workqueue(priv->wq);
err_free_netdev:
free_netdev(netdev);
return NULL;
out:
return err;
}
static void mlx5e_register_vport_rep(struct mlx5_core_dev *mdev)
......@@ -3509,16 +3522,80 @@ static void mlx5e_register_vport_rep(struct mlx5_core_dev *mdev)
}
}
void mlx5e_detach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
const struct mlx5e_profile *profile = priv->profile;
set_bit(MLX5E_STATE_DESTROYING, &priv->state);
if (profile->disable)
profile->disable(priv);
flush_workqueue(priv->wq);
rtnl_lock();
if (netif_running(netdev))
mlx5e_close(netdev);
netif_device_detach(netdev);
rtnl_unlock();
mlx5e_destroy_q_counter(priv);
profile->cleanup_rx(priv);
mlx5e_close_drop_rq(priv);
profile->cleanup_tx(priv);
mlx5_core_destroy_mkey(priv->mdev, &priv->umr_mkey);
cancel_delayed_work_sync(&priv->update_stats_work);
}
/* mlx5e_attach and mlx5e_detach scope should be only creating/destroying
* hardware contexts and to connect it to the current netdev.
*/
static int mlx5e_attach(struct mlx5_core_dev *mdev, void *vpriv)
{
struct mlx5e_priv *priv = vpriv;
struct net_device *netdev = priv->netdev;
int err;
if (netif_device_present(netdev))
return 0;
err = mlx5e_create_mdev_resources(mdev);
if (err)
return err;
err = mlx5e_attach_netdev(mdev, netdev);
if (err) {
mlx5e_destroy_mdev_resources(mdev);
return err;
}
return 0;
}
static void mlx5e_detach(struct mlx5_core_dev *mdev, void *vpriv)
{
struct mlx5e_priv *priv = vpriv;
struct net_device *netdev = priv->netdev;
if (!netif_device_present(netdev))
return;
mlx5e_detach_netdev(mdev, netdev);
mlx5e_destroy_mdev_resources(mdev);
}
static void *mlx5e_add(struct mlx5_core_dev *mdev)
{
struct mlx5_eswitch *esw = mdev->priv.eswitch;
int total_vfs = MLX5_TOTAL_VPORTS(mdev);
void *ppriv = NULL;
void *ret;
if (mlx5e_check_required_hca_cap(mdev))
return NULL;
void *priv;
int vport;
int err;
struct net_device *netdev;
if (mlx5e_create_mdev_resources(mdev))
err = mlx5e_check_required_hca_cap(mdev);
if (err)
return NULL;
mlx5e_register_vport_rep(mdev);
......@@ -3526,12 +3603,39 @@ static void *mlx5e_add(struct mlx5_core_dev *mdev)
if (MLX5_CAP_GEN(mdev, vport_group_manager))
ppriv = &esw->offloads.vport_reps[0];
ret = mlx5e_create_netdev(mdev, &mlx5e_nic_profile, ppriv);
if (!ret) {
mlx5e_destroy_mdev_resources(mdev);
return NULL;
netdev = mlx5e_create_netdev(mdev, &mlx5e_nic_profile, ppriv);
if (!netdev) {
mlx5_core_err(mdev, "mlx5e_create_netdev failed\n");
goto err_unregister_reps;
}
priv = netdev_priv(netdev);
err = mlx5e_attach(mdev, priv);
if (err) {
mlx5_core_err(mdev, "mlx5e_attach failed, %d\n", err);
goto err_destroy_netdev;
}
err = register_netdev(netdev);
if (err) {
mlx5_core_err(mdev, "register_netdev failed, %d\n", err);
goto err_detach;
}
return ret;
return priv;
err_detach:
mlx5e_detach(mdev, priv);
err_destroy_netdev:
mlx5e_destroy_netdev(mdev, priv);
err_unregister_reps:
for (vport = 1; vport < total_vfs; vport++)
mlx5_eswitch_unregister_vport_rep(esw, vport);
return NULL;
}
void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, struct mlx5e_priv *priv)
......@@ -3539,30 +3643,11 @@ void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, struct mlx5e_priv *priv)
const struct mlx5e_profile *profile = priv->profile;
struct net_device *netdev = priv->netdev;
set_bit(MLX5E_STATE_DESTROYING, &priv->state);
if (profile->disable)
profile->disable(priv);
flush_workqueue(priv->wq);
if (test_bit(MLX5_INTERFACE_STATE_SHUTDOWN, &mdev->intf_state)) {
netif_device_detach(netdev);
mlx5e_close(netdev);
} else {
unregister_netdev(netdev);
}
mlx5e_destroy_q_counter(priv);
profile->cleanup_rx(priv);
mlx5e_close_drop_rq(priv);
profile->cleanup_tx(priv);
mlx5_core_destroy_mkey(priv->mdev, &priv->umr_mkey);
cancel_delayed_work_sync(&priv->update_stats_work);
unregister_netdev(netdev);
destroy_workqueue(priv->wq);
if (profile->cleanup)
profile->cleanup(priv);
if (!test_bit(MLX5_INTERFACE_STATE_SHUTDOWN, &mdev->intf_state))
free_netdev(netdev);
free_netdev(netdev);
}
static void mlx5e_remove(struct mlx5_core_dev *mdev, void *vpriv)
......@@ -3572,12 +3657,11 @@ static void mlx5e_remove(struct mlx5_core_dev *mdev, void *vpriv)
struct mlx5e_priv *priv = vpriv;
int vport;
mlx5e_destroy_netdev(mdev, priv);
for (vport = 1; vport < total_vfs; vport++)
mlx5_eswitch_unregister_vport_rep(esw, vport);
mlx5e_destroy_mdev_resources(mdev);
mlx5e_detach(mdev, vpriv);
mlx5e_destroy_netdev(mdev, priv);
}
static void *mlx5e_get_netdev(void *vpriv)
......@@ -3590,6 +3674,8 @@ static void *mlx5e_get_netdev(void *vpriv)
static struct mlx5_interface mlx5e_interface = {
.add = mlx5e_add,
.remove = mlx5e_remove,
.attach = mlx5e_attach,
.detach = mlx5e_detach,
.event = mlx5e_async_event,
.protocol = MLX5_INTERFACE_PROTOCOL_ETH,
.get_dev = mlx5e_get_netdev,
......
......@@ -413,19 +413,50 @@ static struct mlx5e_profile mlx5e_rep_profile = {
int mlx5e_vport_rep_load(struct mlx5_eswitch *esw,
struct mlx5_eswitch_rep *rep)
{
rep->priv_data = mlx5e_create_netdev(esw->dev, &mlx5e_rep_profile, rep);
if (!rep->priv_data) {
mlx5_core_warn(esw->dev, "Failed to create representor for vport %d\n",
rep->vport);
struct net_device *netdev;
int err;
netdev = mlx5e_create_netdev(esw->dev, &mlx5e_rep_profile, rep);
if (!netdev) {
pr_warn("Failed to create representor netdev for vport %d\n",
rep->vport);
return -EINVAL;
}
rep->priv_data = netdev_priv(netdev);
err = mlx5e_attach_netdev(esw->dev, netdev);
if (err) {
pr_warn("Failed to attach representor netdev for vport %d\n",
rep->vport);
goto err_destroy_netdev;
}
err = register_netdev(netdev);
if (err) {
pr_warn("Failed to register representor netdev for vport %d\n",
rep->vport);
goto err_detach_netdev;
}
return 0;
err_detach_netdev:
mlx5e_detach_netdev(esw->dev, netdev);
err_destroy_netdev:
mlx5e_destroy_netdev(esw->dev, rep->priv_data);
return err;
}
void mlx5e_vport_rep_unload(struct mlx5_eswitch *esw,
struct mlx5_eswitch_rep *rep)
{
struct mlx5e_priv *priv = rep->priv_data;
struct net_device *netdev = priv->netdev;
mlx5e_detach_netdev(esw->dev, netdev);
mlx5e_destroy_netdev(esw->dev, priv);
}
......@@ -116,57 +116,6 @@ static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport,
}
/* E-Switch vport context HW commands */
static int query_esw_vport_context_cmd(struct mlx5_core_dev *mdev, u32 vport,
u32 *out, int outlen)
{
u32 in[MLX5_ST_SZ_DW(query_esw_vport_context_in)] = {0};
MLX5_SET(query_nic_vport_context_in, in, opcode,
MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT);
MLX5_SET(query_esw_vport_context_in, in, vport_number, vport);
if (vport)
MLX5_SET(query_esw_vport_context_in, in, other_vport, 1);
return mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen);
}
static int query_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport,
u16 *vlan, u8 *qos)
{
u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)] = {0};
int err;
bool cvlan_strip;
bool cvlan_insert;
*vlan = 0;
*qos = 0;
if (!MLX5_CAP_ESW(dev, vport_cvlan_strip) ||
!MLX5_CAP_ESW(dev, vport_cvlan_insert_if_not_exist))
return -ENOTSUPP;
err = query_esw_vport_context_cmd(dev, vport, out, sizeof(out));
if (err)
goto out;
cvlan_strip = MLX5_GET(query_esw_vport_context_out, out,
esw_vport_context.vport_cvlan_strip);
cvlan_insert = MLX5_GET(query_esw_vport_context_out, out,
esw_vport_context.vport_cvlan_insert);
if (cvlan_strip || cvlan_insert) {
*vlan = MLX5_GET(query_esw_vport_context_out, out,
esw_vport_context.cvlan_id);
*qos = MLX5_GET(query_esw_vport_context_out, out,
esw_vport_context.cvlan_pcp);
}
esw_debug(dev, "Query Vport[%d] cvlan: VLAN %d qos=%d\n",
vport, *vlan, *qos);
out:
return err;
}
static int modify_esw_vport_context_cmd(struct mlx5_core_dev *dev, u16 vport,
void *in, int inlen)
{
......@@ -921,7 +870,7 @@ static void esw_update_vport_rx_mode(struct mlx5_eswitch *esw, u32 vport_num)
esw_debug(esw->dev, "vport[%d] context update rx mode promisc_all=%d, all_multi=%d\n",
vport_num, promisc_all, promisc_mc);
if (!vport->trusted || !vport->enabled) {
if (!vport->info.trusted || !vport->enabled) {
promisc_uc = 0;
promisc_mc = 0;
promisc_all = 0;
......@@ -1257,30 +1206,20 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw,
struct mlx5_vport *vport)
{
struct mlx5_flow_spec *spec;
u8 smac[ETH_ALEN];
int err = 0;
u8 *smac_v;
if (vport->spoofchk) {
err = mlx5_query_nic_vport_mac_address(esw->dev, vport->vport, smac);
if (err) {
esw_warn(esw->dev,
"vport[%d] configure ingress rules failed, query smac failed, err(%d)\n",
vport->vport, err);
return err;
}
if (vport->info.spoofchk && !is_valid_ether_addr(vport->info.mac)) {
mlx5_core_warn(esw->dev,
"vport[%d] configure ingress rules failed, illegal mac with spoofchk\n",
vport->vport);
return -EPERM;
if (!is_valid_ether_addr(smac)) {
mlx5_core_warn(esw->dev,
"vport[%d] configure ingress rules failed, illegal mac with spoofchk\n",
vport->vport);
return -EPERM;
}
}
esw_vport_cleanup_ingress_rules(esw, vport);
if (!vport->vlan && !vport->qos && !vport->spoofchk) {
if (!vport->info.vlan && !vport->info.qos && !vport->info.spoofchk) {
esw_vport_disable_ingress_acl(esw, vport);
return 0;
}
......@@ -1289,7 +1228,7 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw,
esw_debug(esw->dev,
"vport[%d] configure ingress rules, vlan(%d) qos(%d)\n",
vport->vport, vport->vlan, vport->qos);
vport->vport, vport->info.vlan, vport->info.qos);
spec = mlx5_vzalloc(sizeof(*spec));
if (!spec) {
......@@ -1299,16 +1238,16 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw,
goto out;
}
if (vport->vlan || vport->qos)
if (vport->info.vlan || vport->info.qos)
MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.vlan_tag);
if (vport->spoofchk) {
if (vport->info.spoofchk) {
MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.smac_47_16);
MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.smac_15_0);
smac_v = MLX5_ADDR_OF(fte_match_param,
spec->match_value,
outer_headers.smac_47_16);
ether_addr_copy(smac_v, smac);
ether_addr_copy(smac_v, vport->info.mac);
}
spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
......@@ -1354,7 +1293,7 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw,
esw_vport_cleanup_egress_rules(esw, vport);
if (!vport->vlan && !vport->qos) {
if (!vport->info.vlan && !vport->info.qos) {
esw_vport_disable_egress_acl(esw, vport);
return 0;
}
......@@ -1363,7 +1302,7 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw,
esw_debug(esw->dev,
"vport[%d] configure egress rules, vlan(%d) qos(%d)\n",
vport->vport, vport->vlan, vport->qos);
vport->vport, vport->info.vlan, vport->info.qos);
spec = mlx5_vzalloc(sizeof(*spec));
if (!spec) {
......@@ -1377,7 +1316,7 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw,
MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.vlan_tag);
MLX5_SET_TO_ONES(fte_match_param, spec->match_value, outer_headers.vlan_tag);
MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.first_vid);
MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid, vport->vlan);
MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid, vport->info.vlan);
spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
vport->egress.allowed_vlan =
......@@ -1411,6 +1350,41 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw,
return err;
}
static void node_guid_gen_from_mac(u64 *node_guid, u8 mac[ETH_ALEN])
{
((u8 *)node_guid)[7] = mac[0];
((u8 *)node_guid)[6] = mac[1];
((u8 *)node_guid)[5] = mac[2];
((u8 *)node_guid)[4] = 0xff;
((u8 *)node_guid)[3] = 0xfe;
((u8 *)node_guid)[2] = mac[3];
((u8 *)node_guid)[1] = mac[4];
((u8 *)node_guid)[0] = mac[5];
}
static void esw_apply_vport_conf(struct mlx5_eswitch *esw,
struct mlx5_vport *vport)
{
int vport_num = vport->vport;
if (!vport_num)
return;
mlx5_modify_vport_admin_state(esw->dev,
MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT,
vport_num,
vport->info.link_state);
mlx5_modify_nic_vport_mac_address(esw->dev, vport_num, vport->info.mac);
mlx5_modify_nic_vport_node_guid(esw->dev, vport_num, vport->info.node_guid);
modify_esw_vport_cvlan(esw->dev, vport_num, vport->info.vlan, vport->info.qos,
(vport->info.vlan || vport->info.qos));
/* Only legacy mode needs ACLs */
if (esw->mode == SRIOV_LEGACY) {
esw_vport_ingress_config(esw, vport);
esw_vport_egress_config(esw, vport);
}
}
static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num,
int enable_events)
{
......@@ -1421,23 +1395,17 @@ static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num,
esw_debug(esw->dev, "Enabling VPORT(%d)\n", vport_num);
/* Only VFs need ACLs for VST and spoofchk filtering */
if (vport_num && esw->mode == SRIOV_LEGACY) {
esw_vport_ingress_config(esw, vport);
esw_vport_egress_config(esw, vport);
}
mlx5_modify_vport_admin_state(esw->dev,
MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT,
vport_num,
MLX5_ESW_VPORT_ADMIN_STATE_AUTO);
/* Restore old vport configuration */
esw_apply_vport_conf(esw, vport);
/* Sync with current vport context */
vport->enabled_events = enable_events;
vport->enabled = true;
/* only PF is trusted by default */
vport->trusted = (vport_num) ? false : true;
if (!vport_num)
vport->info.trusted = true;
esw_vport_change_handle_locked(vport);
esw->enabled_vports++;
......@@ -1457,11 +1425,6 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num)
vport->enabled = false;
synchronize_irq(mlx5_get_msix_vec(esw->dev, MLX5_EQ_VEC_ASYNC));
mlx5_modify_vport_admin_state(esw->dev,
MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT,
vport_num,
MLX5_ESW_VPORT_ADMIN_STATE_DOWN);
/* Wait for current already scheduled events to complete */
flush_workqueue(esw->work_queue);
/* Disable events from this vport */
......@@ -1473,7 +1436,12 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num)
*/
esw_vport_change_handle_locked(vport);
vport->enabled_events = 0;
if (vport_num && esw->mode == SRIOV_LEGACY) {
mlx5_modify_vport_admin_state(esw->dev,
MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT,
vport_num,
MLX5_ESW_VPORT_ADMIN_STATE_DOWN);
esw_vport_disable_egress_acl(esw, vport);
esw_vport_disable_ingress_acl(esw, vport);
}
......@@ -1559,6 +1527,25 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw)
esw_enable_vport(esw, 0, UC_ADDR_CHANGE);
}
void mlx5_eswitch_attach(struct mlx5_eswitch *esw)
{
if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) ||
MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
return;
esw_enable_vport(esw, 0, UC_ADDR_CHANGE);
/* VF Vports will be enabled when SRIOV is enabled */
}
void mlx5_eswitch_detach(struct mlx5_eswitch *esw)
{
if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) ||
MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
return;
esw_disable_vport(esw, 0);
}
int mlx5_eswitch_init(struct mlx5_core_dev *dev)
{
int l2_table_size = 1 << MLX5_CAP_GEN(dev, log_max_l2_table);
......@@ -1626,6 +1613,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
struct mlx5_vport *vport = &esw->vports[vport_num];
vport->vport = vport_num;
vport->info.link_state = MLX5_ESW_VPORT_ADMIN_STATE_AUTO;
vport->dev = dev;
INIT_WORK(&vport->vport_change_handler,
esw_vport_change_handler);
......@@ -1636,8 +1624,6 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
esw->mode = SRIOV_NONE;
dev->priv.eswitch = esw;
esw_enable_vport(esw, 0, UC_ADDR_CHANGE);
/* VF Vports will be enabled when SRIOV is enabled */
return 0;
abort:
if (esw->work_queue)
......@@ -1656,7 +1642,6 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw)
return;
esw_info(esw->dev, "cleanup\n");
esw_disable_vport(esw, 0);
esw->dev->priv.eswitch = NULL;
destroy_workqueue(esw->work_queue);
......@@ -1689,18 +1674,6 @@ void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe)
(esw && MLX5_CAP_GEN(esw->dev, vport_group_manager) && mlx5_core_is_pf(esw->dev))
#define LEGAL_VPORT(esw, vport) (vport >= 0 && vport < esw->total_vports)
static void node_guid_gen_from_mac(u64 *node_guid, u8 mac[ETH_ALEN])
{
((u8 *)node_guid)[7] = mac[0];
((u8 *)node_guid)[6] = mac[1];
((u8 *)node_guid)[5] = mac[2];
((u8 *)node_guid)[4] = 0xff;
((u8 *)node_guid)[3] = 0xfe;
((u8 *)node_guid)[2] = mac[3];
((u8 *)node_guid)[1] = mac[4];
((u8 *)node_guid)[0] = mac[5];
}
int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw,
int vport, u8 mac[ETH_ALEN])
{
......@@ -1713,13 +1686,15 @@ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw,
if (!LEGAL_VPORT(esw, vport))
return -EINVAL;
mutex_lock(&esw->state_lock);
evport = &esw->vports[vport];
if (evport->spoofchk && !is_valid_ether_addr(mac)) {
if (evport->info.spoofchk && !is_valid_ether_addr(mac)) {
mlx5_core_warn(esw->dev,
"MAC invalidation is not allowed when spoofchk is on, vport(%d)\n",
vport);
return -EPERM;
err = -EPERM;
goto unlock;
}
err = mlx5_modify_nic_vport_mac_address(esw->dev, vport, mac);
......@@ -1727,7 +1702,7 @@ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw,
mlx5_core_warn(esw->dev,
"Failed to mlx5_modify_nic_vport_mac vport(%d) err=(%d)\n",
vport, err);
return err;
goto unlock;
}
node_guid_gen_from_mac(&node_guid, mac);
......@@ -1737,9 +1712,12 @@ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw,
"Failed to set vport %d node guid, err = %d. RDMA_CM will not function properly for this VF.\n",
vport, err);
mutex_lock(&esw->state_lock);
ether_addr_copy(evport->info.mac, mac);
evport->info.node_guid = node_guid;
if (evport->enabled && esw->mode == SRIOV_LEGACY)
err = esw_vport_ingress_config(esw, evport);
unlock:
mutex_unlock(&esw->state_lock);
return err;
}
......@@ -1747,22 +1725,38 @@ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw,
int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw,
int vport, int link_state)
{
struct mlx5_vport *evport;
int err = 0;
if (!ESW_ALLOWED(esw))
return -EPERM;
if (!LEGAL_VPORT(esw, vport))
return -EINVAL;
return mlx5_modify_vport_admin_state(esw->dev,
MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT,
vport, link_state);
mutex_lock(&esw->state_lock);
evport = &esw->vports[vport];
err = mlx5_modify_vport_admin_state(esw->dev,
MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT,
vport, link_state);
if (err) {
mlx5_core_warn(esw->dev,
"Failed to set vport %d link state, err = %d",
vport, err);
goto unlock;
}
evport->info.link_state = link_state;
unlock:
mutex_unlock(&esw->state_lock);
return 0;
}
int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw,
int vport, struct ifla_vf_info *ivi)
{
struct mlx5_vport *evport;
u16 vlan;
u8 qos;
if (!ESW_ALLOWED(esw))
return -EPERM;
......@@ -1774,14 +1768,14 @@ int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw,
memset(ivi, 0, sizeof(*ivi));
ivi->vf = vport - 1;
mlx5_query_nic_vport_mac_address(esw->dev, vport, ivi->mac);
ivi->linkstate = mlx5_query_vport_admin_state(esw->dev,
MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT,
vport);
query_esw_vport_cvlan(esw->dev, vport, &vlan, &qos);
ivi->vlan = vlan;
ivi->qos = qos;
ivi->spoofchk = evport->spoofchk;
mutex_lock(&esw->state_lock);
ether_addr_copy(ivi->mac, evport->info.mac);
ivi->linkstate = evport->info.link_state;
ivi->vlan = evport->info.vlan;
ivi->qos = evport->info.qos;
ivi->spoofchk = evport->info.spoofchk;
ivi->trusted = evport->info.trusted;
mutex_unlock(&esw->state_lock);
return 0;
}
......@@ -1801,23 +1795,23 @@ int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
if (vlan || qos)
set = 1;
mutex_lock(&esw->state_lock);
evport = &esw->vports[vport];
err = modify_esw_vport_cvlan(esw->dev, vport, vlan, qos, set);
if (err)
return err;
goto unlock;
mutex_lock(&esw->state_lock);
evport->vlan = vlan;
evport->qos = qos;
evport->info.vlan = vlan;
evport->info.qos = qos;
if (evport->enabled && esw->mode == SRIOV_LEGACY) {
err = esw_vport_ingress_config(esw, evport);
if (err)
goto out;
goto unlock;
err = esw_vport_egress_config(esw, evport);
}
out:
unlock:
mutex_unlock(&esw->state_lock);
return err;
}
......@@ -1834,16 +1828,14 @@ int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw,
if (!LEGAL_VPORT(esw, vport))
return -EINVAL;
evport = &esw->vports[vport];
mutex_lock(&esw->state_lock);
pschk = evport->spoofchk;
evport->spoofchk = spoofchk;
if (evport->enabled && esw->mode == SRIOV_LEGACY) {
evport = &esw->vports[vport];
pschk = evport->info.spoofchk;
evport->info.spoofchk = spoofchk;
if (evport->enabled && esw->mode == SRIOV_LEGACY)
err = esw_vport_ingress_config(esw, evport);
if (err)
evport->spoofchk = pschk;
}
if (err)
evport->info.spoofchk = pschk;
mutex_unlock(&esw->state_lock);
return err;
......@@ -1859,10 +1851,9 @@ int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw,
if (!LEGAL_VPORT(esw, vport))
return -EINVAL;
evport = &esw->vports[vport];
mutex_lock(&esw->state_lock);
evport->trusted = setting;
evport = &esw->vports[vport];
evport->info.trusted = setting;
if (evport->enabled)
esw_vport_change_handle_locked(evport);
mutex_unlock(&esw->state_lock);
......
......@@ -109,6 +109,16 @@ struct vport_egress {
struct mlx5_flow_rule *drop_rule;
};
struct mlx5_vport_info {
u8 mac[ETH_ALEN];
u16 vlan;
u8 qos;
u64 node_guid;
int link_state;
bool spoofchk;
bool trusted;
};
struct mlx5_vport {
struct mlx5_core_dev *dev;
int vport;
......@@ -121,10 +131,8 @@ struct mlx5_vport {
struct vport_ingress ingress;
struct vport_egress egress;
u16 vlan;
u8 qos;
bool spoofchk;
bool trusted;
struct mlx5_vport_info info;
bool enabled;
u16 enabled_events;
};
......@@ -204,6 +212,8 @@ struct mlx5_eswitch {
/* E-Switch API */
int mlx5_eswitch_init(struct mlx5_core_dev *dev);
void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw);
void mlx5_eswitch_attach(struct mlx5_eswitch *esw);
void mlx5_eswitch_detach(struct mlx5_eswitch *esw);
void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe);
int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode);
void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw);
......
......@@ -277,7 +277,7 @@ static void mlx5_do_bond_work(struct work_struct *work)
bond_work);
int status;
status = mutex_trylock(&mlx5_intf_mutex);
status = mlx5_dev_list_trylock();
if (!status) {
/* 1 sec delay. */
mlx5_queue_bond_work(ldev, HZ);
......@@ -285,7 +285,7 @@ static void mlx5_do_bond_work(struct work_struct *work)
}
mlx5_do_bond(ldev);
mutex_unlock(&mlx5_intf_mutex);
mlx5_dev_list_unlock();
}
static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev,
......@@ -466,35 +466,21 @@ static void mlx5_lag_dev_remove_pf(struct mlx5_lag *ldev,
mutex_unlock(&lag_mutex);
}
static u16 mlx5_gen_pci_id(struct mlx5_core_dev *dev)
{
return (u16)((dev->pdev->bus->number << 8) |
PCI_SLOT(dev->pdev->devfn));
}
/* Must be called with intf_mutex held */
void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev)
{
struct mlx5_lag *ldev = NULL;
struct mlx5_core_dev *tmp_dev;
struct mlx5_priv *priv;
u16 pci_id;
if (!MLX5_CAP_GEN(dev, vport_group_manager) ||
!MLX5_CAP_GEN(dev, lag_master) ||
(MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_MAX_PORTS))
return;
pci_id = mlx5_gen_pci_id(dev);
mlx5_core_for_each_priv(priv) {
tmp_dev = container_of(priv, struct mlx5_core_dev, priv);
if ((dev != tmp_dev) &&
(mlx5_gen_pci_id(tmp_dev) == pci_id)) {
ldev = tmp_dev->priv.lag;
break;
}
}
tmp_dev = mlx5_get_next_phys_dev(dev);
if (tmp_dev)
ldev = tmp_dev->priv.lag;
if (!ldev) {
ldev = mlx5_lag_dev_alloc();
......
......@@ -72,17 +72,6 @@ static int prof_sel = MLX5_DEFAULT_PROF;
module_param_named(prof_sel, prof_sel, int, 0444);
MODULE_PARM_DESC(prof_sel, "profile selector. Valid range 0 - 2");
static LIST_HEAD(intf_list);
LIST_HEAD(mlx5_dev_list);
DEFINE_MUTEX(mlx5_intf_mutex);
struct mlx5_device_context {
struct list_head list;
struct mlx5_interface *intf;
void *context;
};
enum {
MLX5_ATOMIC_REQ_MODE_BE = 0x0,
MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS = 0x1,
......@@ -778,147 +767,6 @@ static int mlx5_core_set_issi(struct mlx5_core_dev *dev)
return -ENOTSUPP;
}
static void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
{
struct mlx5_device_context *dev_ctx;
struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv);
if (!mlx5_lag_intf_add(intf, priv))
return;
dev_ctx = kmalloc(sizeof(*dev_ctx), GFP_KERNEL);
if (!dev_ctx)
return;
dev_ctx->intf = intf;
dev_ctx->context = intf->add(dev);
if (dev_ctx->context) {
spin_lock_irq(&priv->ctx_lock);
list_add_tail(&dev_ctx->list, &priv->ctx_list);
spin_unlock_irq(&priv->ctx_lock);
} else {
kfree(dev_ctx);
}
}
static void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
{
struct mlx5_device_context *dev_ctx;
struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv);
list_for_each_entry(dev_ctx, &priv->ctx_list, list)
if (dev_ctx->intf == intf) {
spin_lock_irq(&priv->ctx_lock);
list_del(&dev_ctx->list);
spin_unlock_irq(&priv->ctx_lock);
intf->remove(dev, dev_ctx->context);
kfree(dev_ctx);
return;
}
}
static int mlx5_register_device(struct mlx5_core_dev *dev)
{
struct mlx5_priv *priv = &dev->priv;
struct mlx5_interface *intf;
mutex_lock(&mlx5_intf_mutex);
list_add_tail(&priv->dev_list, &mlx5_dev_list);
list_for_each_entry(intf, &intf_list, list)
mlx5_add_device(intf, priv);
mutex_unlock(&mlx5_intf_mutex);
return 0;
}
static void mlx5_unregister_device(struct mlx5_core_dev *dev)
{
struct mlx5_priv *priv = &dev->priv;
struct mlx5_interface *intf;
mutex_lock(&mlx5_intf_mutex);
list_for_each_entry(intf, &intf_list, list)
mlx5_remove_device(intf, priv);
list_del(&priv->dev_list);
mutex_unlock(&mlx5_intf_mutex);
}
int mlx5_register_interface(struct mlx5_interface *intf)
{
struct mlx5_priv *priv;
if (!intf->add || !intf->remove)
return -EINVAL;
mutex_lock(&mlx5_intf_mutex);
list_add_tail(&intf->list, &intf_list);
list_for_each_entry(priv, &mlx5_dev_list, dev_list)
mlx5_add_device(intf, priv);
mutex_unlock(&mlx5_intf_mutex);
return 0;
}
EXPORT_SYMBOL(mlx5_register_interface);
void mlx5_unregister_interface(struct mlx5_interface *intf)
{
struct mlx5_priv *priv;
mutex_lock(&mlx5_intf_mutex);
list_for_each_entry(priv, &mlx5_dev_list, dev_list)
mlx5_remove_device(intf, priv);
list_del(&intf->list);
mutex_unlock(&mlx5_intf_mutex);
}
EXPORT_SYMBOL(mlx5_unregister_interface);
void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol)
{
struct mlx5_priv *priv = &mdev->priv;
struct mlx5_device_context *dev_ctx;
unsigned long flags;
void *result = NULL;
spin_lock_irqsave(&priv->ctx_lock, flags);
list_for_each_entry(dev_ctx, &mdev->priv.ctx_list, list)
if ((dev_ctx->intf->protocol == protocol) &&
dev_ctx->intf->get_dev) {
result = dev_ctx->intf->get_dev(dev_ctx->context);
break;
}
spin_unlock_irqrestore(&priv->ctx_lock, flags);
return result;
}
EXPORT_SYMBOL(mlx5_get_protocol_dev);
/* Must be called with intf_mutex held */
void mlx5_add_dev_by_protocol(struct mlx5_core_dev *dev, int protocol)
{
struct mlx5_interface *intf;
list_for_each_entry(intf, &intf_list, list)
if (intf->protocol == protocol) {
mlx5_add_device(intf, &dev->priv);
break;
}
}
/* Must be called with intf_mutex held */
void mlx5_remove_dev_by_protocol(struct mlx5_core_dev *dev, int protocol)
{
struct mlx5_interface *intf;
list_for_each_entry(intf, &intf_list, list)
if (intf->protocol == protocol) {
mlx5_remove_device(intf, &dev->priv);
break;
}
}
static int mlx5_pci_init(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
{
......@@ -991,8 +839,102 @@ static void mlx5_pci_close(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
debugfs_remove(priv->dbg_root);
}
#define MLX5_IB_MOD "mlx5_ib"
static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
{
struct pci_dev *pdev = dev->pdev;
int err;
err = mlx5_query_hca_caps(dev);
if (err) {
dev_err(&pdev->dev, "query hca failed\n");
goto out;
}
err = mlx5_query_board_id(dev);
if (err) {
dev_err(&pdev->dev, "query board id failed\n");
goto out;
}
err = mlx5_eq_init(dev);
if (err) {
dev_err(&pdev->dev, "failed to initialize eq\n");
goto out;
}
MLX5_INIT_DOORBELL_LOCK(&priv->cq_uar_lock);
err = mlx5_init_cq_table(dev);
if (err) {
dev_err(&pdev->dev, "failed to initialize cq table\n");
goto err_eq_cleanup;
}
mlx5_init_qp_table(dev);
mlx5_init_srq_table(dev);
mlx5_init_mkey_table(dev);
err = mlx5_init_rl_table(dev);
if (err) {
dev_err(&pdev->dev, "Failed to init rate limiting\n");
goto err_tables_cleanup;
}
#ifdef CONFIG_MLX5_CORE_EN
err = mlx5_eswitch_init(dev);
if (err) {
dev_err(&pdev->dev, "Failed to init eswitch %d\n", err);
goto err_rl_cleanup;
}
#endif
err = mlx5_sriov_init(dev);
if (err) {
dev_err(&pdev->dev, "Failed to init sriov %d\n", err);
goto err_eswitch_cleanup;
}
return 0;
err_eswitch_cleanup:
#ifdef CONFIG_MLX5_CORE_EN
mlx5_eswitch_cleanup(dev->priv.eswitch);
err_rl_cleanup:
#endif
mlx5_cleanup_rl_table(dev);
err_tables_cleanup:
mlx5_cleanup_mkey_table(dev);
mlx5_cleanup_srq_table(dev);
mlx5_cleanup_qp_table(dev);
mlx5_cleanup_cq_table(dev);
err_eq_cleanup:
mlx5_eq_cleanup(dev);
out:
return err;
}
static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
{
mlx5_sriov_cleanup(dev);
#ifdef CONFIG_MLX5_CORE_EN
mlx5_eswitch_cleanup(dev->priv.eswitch);
#endif
mlx5_cleanup_rl_table(dev);
mlx5_cleanup_mkey_table(dev);
mlx5_cleanup_srq_table(dev);
mlx5_cleanup_qp_table(dev);
mlx5_cleanup_cq_table(dev);
mlx5_eq_cleanup(dev);
}
static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
bool boot)
{
struct pci_dev *pdev = dev->pdev;
int err;
......@@ -1025,12 +967,10 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
goto out_err;
}
mlx5_pagealloc_init(dev);
err = mlx5_core_enable_hca(dev, 0);
if (err) {
dev_err(&pdev->dev, "enable hca failed\n");
goto err_pagealloc_cleanup;
goto err_cmd_cleanup;
}
err = mlx5_core_set_issi(dev);
......@@ -1083,34 +1023,21 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
mlx5_start_health_poll(dev);
err = mlx5_query_hca_caps(dev);
if (err) {
dev_err(&pdev->dev, "query hca failed\n");
goto err_stop_poll;
}
err = mlx5_query_board_id(dev);
if (err) {
dev_err(&pdev->dev, "query board id failed\n");
if (boot && mlx5_init_once(dev, priv)) {
dev_err(&pdev->dev, "sw objs init failed\n");
goto err_stop_poll;
}
err = mlx5_enable_msix(dev);
if (err) {
dev_err(&pdev->dev, "enable msix failed\n");
goto err_stop_poll;
}
err = mlx5_eq_init(dev);
if (err) {
dev_err(&pdev->dev, "failed to initialize eq\n");
goto disable_msix;
goto err_cleanup_once;
}
err = mlx5_alloc_uuars(dev, &priv->uuari);
if (err) {
dev_err(&pdev->dev, "Failed allocating uar, aborting\n");
goto err_eq_cleanup;
goto err_disable_msix;
}
err = mlx5_start_eqs(dev);
......@@ -1126,15 +1053,10 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
}
err = mlx5_irq_set_affinity_hints(dev);
if (err)
if (err) {
dev_err(&pdev->dev, "Failed to alloc affinity hint cpumask\n");
MLX5_INIT_DOORBELL_LOCK(&priv->cq_uar_lock);
mlx5_init_cq_table(dev);
mlx5_init_qp_table(dev);
mlx5_init_srq_table(dev);
mlx5_init_mkey_table(dev);
goto err_affinity_hints;
}
err = mlx5_init_fs(dev);
if (err) {
......@@ -1142,36 +1064,26 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
goto err_fs;
}
err = mlx5_init_rl_table(dev);
if (err) {
dev_err(&pdev->dev, "Failed to init rate limiting\n");
goto err_rl;
}
#ifdef CONFIG_MLX5_CORE_EN
err = mlx5_eswitch_init(dev);
if (err) {
dev_err(&pdev->dev, "eswitch init failed %d\n", err);
goto err_reg_dev;
}
mlx5_eswitch_attach(dev->priv.eswitch);
#endif
err = mlx5_sriov_init(dev);
err = mlx5_sriov_attach(dev);
if (err) {
dev_err(&pdev->dev, "sriov init failed %d\n", err);
goto err_sriov;
}
err = mlx5_register_device(dev);
if (err) {
dev_err(&pdev->dev, "mlx5_register_device failed %d\n", err);
goto err_reg_dev;
if (mlx5_device_registered(dev)) {
mlx5_attach_device(dev);
} else {
err = mlx5_register_device(dev);
if (err) {
dev_err(&pdev->dev, "mlx5_register_device failed %d\n", err);
goto err_reg_dev;
}
}
err = request_module_nowait(MLX5_IB_MOD);
if (err)
pr_info("failed request module on %s\n", MLX5_IB_MOD);
clear_bit(MLX5_INTERFACE_STATE_DOWN, &dev->intf_state);
set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
out:
......@@ -1179,23 +1091,19 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
return 0;
err_sriov:
if (mlx5_sriov_cleanup(dev))
dev_err(&dev->pdev->dev, "sriov cleanup failed\n");
err_reg_dev:
mlx5_sriov_detach(dev);
err_sriov:
#ifdef CONFIG_MLX5_CORE_EN
mlx5_eswitch_cleanup(dev->priv.eswitch);
mlx5_eswitch_detach(dev->priv.eswitch);
#endif
err_reg_dev:
mlx5_cleanup_rl_table(dev);
err_rl:
mlx5_cleanup_fs(dev);
err_fs:
mlx5_cleanup_mkey_table(dev);
mlx5_cleanup_srq_table(dev);
mlx5_cleanup_qp_table(dev);
mlx5_cleanup_cq_table(dev);
mlx5_irq_clear_affinity_hints(dev);
err_affinity_hints:
free_comp_eqs(dev);
err_stop_eqs:
......@@ -1204,12 +1112,13 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
err_free_uar:
mlx5_free_uuars(dev, &priv->uuari);
err_eq_cleanup:
mlx5_eq_cleanup(dev);
disable_msix:
err_disable_msix:
mlx5_disable_msix(dev);
err_cleanup_once:
if (boot)
mlx5_cleanup_once(dev);
err_stop_poll:
mlx5_stop_health_poll(dev);
if (mlx5_cmd_teardown_hca(dev)) {
......@@ -1226,8 +1135,7 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
err_disable_hca:
mlx5_core_disable_hca(dev, 0);
err_pagealloc_cleanup:
mlx5_pagealloc_cleanup(dev);
err_cmd_cleanup:
mlx5_cmd_cleanup(dev);
out_err:
......@@ -1237,40 +1145,35 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
return err;
}
static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
bool cleanup)
{
int err = 0;
err = mlx5_sriov_cleanup(dev);
if (err) {
dev_warn(&dev->pdev->dev, "%s: sriov cleanup failed - abort\n",
__func__);
return err;
}
mutex_lock(&dev->intf_state_mutex);
if (test_bit(MLX5_INTERFACE_STATE_DOWN, &dev->intf_state)) {
dev_warn(&dev->pdev->dev, "%s: interface is down, NOP\n",
__func__);
if (cleanup)
mlx5_cleanup_once(dev);
goto out;
}
mlx5_unregister_device(dev);
if (mlx5_device_registered(dev))
mlx5_detach_device(dev);
mlx5_sriov_detach(dev);
#ifdef CONFIG_MLX5_CORE_EN
mlx5_eswitch_cleanup(dev->priv.eswitch);
mlx5_eswitch_detach(dev->priv.eswitch);
#endif
mlx5_cleanup_rl_table(dev);
mlx5_cleanup_fs(dev);
mlx5_cleanup_mkey_table(dev);
mlx5_cleanup_srq_table(dev);
mlx5_cleanup_qp_table(dev);
mlx5_cleanup_cq_table(dev);
mlx5_irq_clear_affinity_hints(dev);
free_comp_eqs(dev);
mlx5_stop_eqs(dev);
mlx5_free_uuars(dev, &priv->uuari);
mlx5_eq_cleanup(dev);
mlx5_disable_msix(dev);
if (cleanup)
mlx5_cleanup_once(dev);
mlx5_stop_health_poll(dev);
err = mlx5_cmd_teardown_hca(dev);
if (err) {
......@@ -1280,7 +1183,6 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
mlx5_pagealloc_stop(dev);
mlx5_reclaim_startup_pages(dev);
mlx5_core_disable_hca(dev, 0);
mlx5_pagealloc_cleanup(dev);
mlx5_cmd_cleanup(dev);
out:
......@@ -1290,22 +1192,6 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
return err;
}
void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
unsigned long param)
{
struct mlx5_priv *priv = &dev->priv;
struct mlx5_device_context *dev_ctx;
unsigned long flags;
spin_lock_irqsave(&priv->ctx_lock, flags);
list_for_each_entry(dev_ctx, &priv->ctx_list, list)
if (dev_ctx->intf->event)
dev_ctx->intf->event(dev, dev_ctx->context, event, param);
spin_unlock_irqrestore(&priv->ctx_lock, flags);
}
struct mlx5_core_event_handler {
void (*event)(struct mlx5_core_dev *dev,
enum mlx5_dev_event event,
......@@ -1319,6 +1205,7 @@ static const struct devlink_ops mlx5_devlink_ops = {
#endif
};
#define MLX5_IB_MOD "mlx5_ib"
static int init_one(struct pci_dev *pdev,
const struct pci_device_id *id)
{
......@@ -1365,12 +1252,18 @@ static int init_one(struct pci_dev *pdev,
goto close_pci;
}
err = mlx5_load_one(dev, priv);
mlx5_pagealloc_init(dev);
err = mlx5_load_one(dev, priv, true);
if (err) {
dev_err(&pdev->dev, "mlx5_load_one failed with error code %d\n", err);
goto clean_health;
}
err = request_module_nowait(MLX5_IB_MOD);
if (err)
pr_info("failed request module on %s\n", MLX5_IB_MOD);
err = devlink_register(devlink, &pdev->dev);
if (err)
goto clean_load;
......@@ -1378,8 +1271,9 @@ static int init_one(struct pci_dev *pdev,
return 0;
clean_load:
mlx5_unload_one(dev, priv);
mlx5_unload_one(dev, priv, true);
clean_health:
mlx5_pagealloc_cleanup(dev);
mlx5_health_cleanup(dev);
close_pci:
mlx5_pci_close(dev, priv);
......@@ -1397,11 +1291,15 @@ static void remove_one(struct pci_dev *pdev)
struct mlx5_priv *priv = &dev->priv;
devlink_unregister(devlink);
if (mlx5_unload_one(dev, priv)) {
mlx5_unregister_device(dev);
if (mlx5_unload_one(dev, priv, true)) {
dev_err(&dev->pdev->dev, "mlx5_unload_one failed\n");
mlx5_health_cleanup(dev);
return;
}
mlx5_pagealloc_cleanup(dev);
mlx5_health_cleanup(dev);
mlx5_pci_close(dev, priv);
pci_set_drvdata(pdev, NULL);
......@@ -1416,7 +1314,7 @@ static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev,
dev_info(&pdev->dev, "%s was called\n", __func__);
mlx5_enter_error_state(dev);
mlx5_unload_one(dev, priv);
mlx5_unload_one(dev, priv, false);
pci_save_state(pdev);
mlx5_pci_disable_device(dev);
return state == pci_channel_io_perm_failure ?
......@@ -1488,7 +1386,7 @@ static void mlx5_pci_resume(struct pci_dev *pdev)
dev_info(&pdev->dev, "%s was called\n", __func__);
err = mlx5_load_one(dev, priv);
err = mlx5_load_one(dev, priv, false);
if (err)
dev_err(&pdev->dev, "%s: mlx5_load_one failed with error code: %d\n"
, __func__, err);
......@@ -1510,7 +1408,7 @@ static void shutdown(struct pci_dev *pdev)
dev_info(&pdev->dev, "Shutdown was called\n");
/* Notify mlx5 clients that the kernel is being shut down */
set_bit(MLX5_INTERFACE_STATE_SHUTDOWN, &dev->intf_state);
mlx5_unload_one(dev, priv);
mlx5_unload_one(dev, priv, false);
mlx5_pci_disable_device(dev);
}
......
......@@ -46,9 +46,6 @@
extern int mlx5_core_debug_mask;
extern struct list_head mlx5_dev_list;
extern struct mutex mlx5_intf_mutex;
#define mlx5_core_dbg(__dev, format, ...) \
dev_dbg(&(__dev)->pdev->dev, "%s:%s:%d:(pid %d): " format, \
(__dev)->priv.name, __func__, __LINE__, current->pid, \
......@@ -73,9 +70,6 @@ do { \
#define mlx5_core_info(__dev, format, ...) \
dev_info(&(__dev)->pdev->dev, format, ##__VA_ARGS__)
#define mlx5_core_for_each_priv(__priv) \
list_for_each_entry(__priv, &mlx5_dev_list, dev_list)
enum {
MLX5_CMD_DATA, /* print command payload only */
MLX5_CMD_TIME, /* print command execution time */
......@@ -89,6 +83,10 @@ void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
unsigned long param);
void mlx5_enter_error_state(struct mlx5_core_dev *dev);
void mlx5_disable_device(struct mlx5_core_dev *dev);
int mlx5_sriov_init(struct mlx5_core_dev *dev);
void mlx5_sriov_cleanup(struct mlx5_core_dev *dev);
int mlx5_sriov_attach(struct mlx5_core_dev *dev);
void mlx5_sriov_detach(struct mlx5_core_dev *dev);
int mlx5_core_sriov_configure(struct pci_dev *dev, int num_vfs);
bool mlx5_sriov_is_enabled(struct mlx5_core_dev *dev);
int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id);
......@@ -102,8 +100,19 @@ void mlx5_cq_tasklet_cb(unsigned long data);
void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev);
void mlx5_lag_remove(struct mlx5_core_dev *dev);
void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv);
void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv);
void mlx5_attach_device(struct mlx5_core_dev *dev);
void mlx5_detach_device(struct mlx5_core_dev *dev);
bool mlx5_device_registered(struct mlx5_core_dev *dev);
int mlx5_register_device(struct mlx5_core_dev *dev);
void mlx5_unregister_device(struct mlx5_core_dev *dev);
void mlx5_add_dev_by_protocol(struct mlx5_core_dev *dev, int protocol);
void mlx5_remove_dev_by_protocol(struct mlx5_core_dev *dev, int protocol);
struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev);
void mlx5_dev_list_lock(void);
void mlx5_dev_list_unlock(void);
int mlx5_dev_list_trylock(void);
bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv);
......
......@@ -326,6 +326,7 @@ static int reclaim_pages_cmd(struct mlx5_core_dev *dev,
{
struct fw_page *fwp;
struct rb_node *p;
u32 func_id;
u32 npages;
u32 i = 0;
......@@ -334,12 +335,16 @@ static int reclaim_pages_cmd(struct mlx5_core_dev *dev,
/* No hard feelings, we want our pages back! */
npages = MLX5_GET(manage_pages_in, in, input_num_entries);
func_id = MLX5_GET(manage_pages_in, in, function_id);
p = rb_first(&dev->priv.page_root);
while (p && i < npages) {
fwp = rb_entry(p, struct fw_page, rb_node);
MLX5_SET64(manage_pages_out, out, pas[i], fwp->addr);
p = rb_next(p);
if (fwp->func_id != func_id)
continue;
MLX5_SET64(manage_pages_out, out, pas[i], fwp->addr);
i++;
}
......@@ -540,6 +545,12 @@ int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev)
unsigned long end = jiffies + msecs_to_jiffies(MAX_RECLAIM_VFS_PAGES_TIME_MSECS);
int prev_vfs_pages = dev->priv.vfs_pages;
/* In case of internal error we will free the pages manually later */
if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
mlx5_core_warn(dev, "Skipping wait for vf pages stage");
return 0;
}
mlx5_core_dbg(dev, "Waiting for %d pages from %s\n", prev_vfs_pages,
dev->priv.name);
while (dev->priv.vfs_pages) {
......
......@@ -44,108 +44,132 @@ bool mlx5_sriov_is_enabled(struct mlx5_core_dev *dev)
return !!sriov->num_vfs;
}
static void enable_vfs(struct mlx5_core_dev *dev, int num_vfs)
static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs)
{
struct mlx5_core_sriov *sriov = &dev->priv.sriov;
int err;
int vf;
for (vf = 1; vf <= num_vfs; vf++) {
err = mlx5_core_enable_hca(dev, vf);
if (sriov->enabled_vfs) {
mlx5_core_warn(dev,
"failed to enable SRIOV on device, already enabled with %d vfs\n",
sriov->enabled_vfs);
return -EBUSY;
}
#ifdef CONFIG_MLX5_CORE_EN
err = mlx5_eswitch_enable_sriov(dev->priv.eswitch, num_vfs, SRIOV_LEGACY);
if (err) {
mlx5_core_warn(dev,
"failed to enable eswitch SRIOV (%d)\n", err);
return err;
}
#endif
for (vf = 0; vf < num_vfs; vf++) {
err = mlx5_core_enable_hca(dev, vf + 1);
if (err) {
mlx5_core_warn(dev, "failed to enable VF %d\n", vf - 1);
} else {
sriov->vfs_ctx[vf - 1].enabled = 1;
mlx5_core_dbg(dev, "successfully enabled VF %d\n", vf - 1);
mlx5_core_warn(dev, "failed to enable VF %d (%d)\n", vf, err);
continue;
}
sriov->vfs_ctx[vf].enabled = 1;
sriov->enabled_vfs++;
mlx5_core_dbg(dev, "successfully enabled VF* %d\n", vf);
}
return 0;
}
static void disable_vfs(struct mlx5_core_dev *dev, int num_vfs)
static void mlx5_device_disable_sriov(struct mlx5_core_dev *dev)
{
struct mlx5_core_sriov *sriov = &dev->priv.sriov;
int err;
int vf;
for (vf = 1; vf <= num_vfs; vf++) {
if (sriov->vfs_ctx[vf - 1].enabled) {
if (mlx5_core_disable_hca(dev, vf))
mlx5_core_warn(dev, "failed to disable VF %d\n", vf - 1);
else
sriov->vfs_ctx[vf - 1].enabled = 0;
if (!sriov->enabled_vfs)
return;
for (vf = 0; vf < sriov->num_vfs; vf++) {
if (!sriov->vfs_ctx[vf].enabled)
continue;
err = mlx5_core_disable_hca(dev, vf + 1);
if (err) {
mlx5_core_warn(dev, "failed to disable VF %d\n", vf);
continue;
}
sriov->vfs_ctx[vf].enabled = 0;
sriov->enabled_vfs--;
}
#ifdef CONFIG_MLX5_CORE_EN
mlx5_eswitch_disable_sriov(dev->priv.eswitch);
#endif
if (mlx5_wait_for_vf_pages(dev))
mlx5_core_warn(dev, "timeout reclaiming VFs pages\n");
}
static int mlx5_core_create_vfs(struct pci_dev *pdev, int num_vfs)
static int mlx5_pci_enable_sriov(struct pci_dev *pdev, int num_vfs)
{
struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
int err;
if (pci_num_vf(pdev))
pci_disable_sriov(pdev);
enable_vfs(dev, num_vfs);
int err = 0;
err = pci_enable_sriov(pdev, num_vfs);
if (err) {
dev_warn(&pdev->dev, "enable sriov failed %d\n", err);
goto ex;
if (pci_num_vf(pdev)) {
mlx5_core_warn(dev, "Unable to enable pci sriov, already enabled\n");
return -EBUSY;
}
return 0;
err = pci_enable_sriov(pdev, num_vfs);
if (err)
mlx5_core_warn(dev, "pci_enable_sriov failed : %d\n", err);
ex:
disable_vfs(dev, num_vfs);
return err;
}
static int mlx5_core_sriov_enable(struct pci_dev *pdev, int num_vfs)
static void mlx5_pci_disable_sriov(struct pci_dev *pdev)
{
pci_disable_sriov(pdev);
}
static int mlx5_sriov_enable(struct pci_dev *pdev, int num_vfs)
{
struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
struct mlx5_core_sriov *sriov = &dev->priv.sriov;
int err;
int err = 0;
kfree(sriov->vfs_ctx);
sriov->vfs_ctx = kcalloc(num_vfs, sizeof(*sriov->vfs_ctx), GFP_ATOMIC);
if (!sriov->vfs_ctx)
return -ENOMEM;
err = mlx5_device_enable_sriov(dev, num_vfs);
if (err) {
mlx5_core_warn(dev, "mlx5_device_enable_sriov failed : %d\n", err);
return err;
}
sriov->enabled_vfs = num_vfs;
err = mlx5_core_create_vfs(pdev, num_vfs);
err = mlx5_pci_enable_sriov(pdev, num_vfs);
if (err) {
kfree(sriov->vfs_ctx);
sriov->vfs_ctx = NULL;
mlx5_core_warn(dev, "mlx5_pci_enable_sriov failed : %d\n", err);
mlx5_device_disable_sriov(dev);
return err;
}
sriov->num_vfs = num_vfs;
return 0;
}
static void mlx5_core_init_vfs(struct mlx5_core_dev *dev, int num_vfs)
static void mlx5_sriov_disable(struct pci_dev *pdev)
{
struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
struct mlx5_core_sriov *sriov = &dev->priv.sriov;
sriov->num_vfs = num_vfs;
}
static void mlx5_core_cleanup_vfs(struct mlx5_core_dev *dev)
{
struct mlx5_core_sriov *sriov;
sriov = &dev->priv.sriov;
disable_vfs(dev, sriov->num_vfs);
if (mlx5_wait_for_vf_pages(dev))
mlx5_core_warn(dev, "timeout claiming VFs pages\n");
mlx5_pci_disable_sriov(pdev);
mlx5_device_disable_sriov(dev);
sriov->num_vfs = 0;
}
int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs)
{
struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
struct mlx5_core_sriov *sriov = &dev->priv.sriov;
int err;
int err = 0;
mlx5_core_dbg(dev, "requested num_vfs %d\n", num_vfs);
if (!mlx5_core_is_pf(dev))
......@@ -156,92 +180,57 @@ int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs)
return -EINVAL;
}
mlx5_core_cleanup_vfs(dev);
if (num_vfs)
err = mlx5_sriov_enable(pdev, num_vfs);
else
mlx5_sriov_disable(pdev);
if (!num_vfs) {
#ifdef CONFIG_MLX5_CORE_EN
mlx5_eswitch_disable_sriov(dev->priv.eswitch);
#endif
kfree(sriov->vfs_ctx);
sriov->vfs_ctx = NULL;
if (!pci_vfs_assigned(pdev))
pci_disable_sriov(pdev);
else
mlx5_core_info(dev, "unloading PF driver while leaving orphan VFs\n");
return 0;
}
return err ? err : num_vfs;
}
err = mlx5_core_sriov_enable(pdev, num_vfs);
if (err) {
mlx5_core_warn(dev, "mlx5_core_sriov_enable failed %d\n", err);
return err;
}
int mlx5_sriov_attach(struct mlx5_core_dev *dev)
{
struct mlx5_core_sriov *sriov = &dev->priv.sriov;
mlx5_core_init_vfs(dev, num_vfs);
#ifdef CONFIG_MLX5_CORE_EN
mlx5_eswitch_enable_sriov(dev->priv.eswitch, num_vfs, SRIOV_LEGACY);
#endif
if (!mlx5_core_is_pf(dev) || !sriov->num_vfs)
return 0;
return num_vfs;
/* If sriov VFs exist in PCI level, enable them in device level */
return mlx5_device_enable_sriov(dev, sriov->num_vfs);
}
static int sync_required(struct pci_dev *pdev)
void mlx5_sriov_detach(struct mlx5_core_dev *dev)
{
struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
struct mlx5_core_sriov *sriov = &dev->priv.sriov;
int cur_vfs = pci_num_vf(pdev);
if (cur_vfs != sriov->num_vfs) {
mlx5_core_warn(dev, "current VFs %d, registered %d - sync needed\n",
cur_vfs, sriov->num_vfs);
return 1;
}
if (!mlx5_core_is_pf(dev))
return;
return 0;
mlx5_device_disable_sriov(dev);
}
int mlx5_sriov_init(struct mlx5_core_dev *dev)
{
struct mlx5_core_sriov *sriov = &dev->priv.sriov;
struct pci_dev *pdev = dev->pdev;
int cur_vfs;
int total_vfs;
if (!mlx5_core_is_pf(dev))
return 0;
if (!sync_required(dev->pdev))
return 0;
cur_vfs = pci_num_vf(pdev);
sriov->vfs_ctx = kcalloc(cur_vfs, sizeof(*sriov->vfs_ctx), GFP_KERNEL);
total_vfs = pci_sriov_get_totalvfs(pdev);
sriov->num_vfs = pci_num_vf(pdev);
sriov->vfs_ctx = kcalloc(total_vfs, sizeof(*sriov->vfs_ctx), GFP_KERNEL);
if (!sriov->vfs_ctx)
return -ENOMEM;
sriov->enabled_vfs = cur_vfs;
mlx5_core_init_vfs(dev, cur_vfs);
#ifdef CONFIG_MLX5_CORE_EN
if (cur_vfs)
mlx5_eswitch_enable_sriov(dev->priv.eswitch, cur_vfs,
SRIOV_LEGACY);
#endif
enable_vfs(dev, cur_vfs);
return 0;
}
int mlx5_sriov_cleanup(struct mlx5_core_dev *dev)
void mlx5_sriov_cleanup(struct mlx5_core_dev *dev)
{
struct pci_dev *pdev = dev->pdev;
int err;
struct mlx5_core_sriov *sriov = &dev->priv.sriov;
if (!mlx5_core_is_pf(dev))
return 0;
return;
err = mlx5_core_sriov_configure(pdev, 0);
if (err)
return err;
return 0;
kfree(sriov->vfs_ctx);
}
......@@ -828,8 +828,6 @@ void mlx5_pagealloc_init(struct mlx5_core_dev *dev);
void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev);
int mlx5_pagealloc_start(struct mlx5_core_dev *dev);
void mlx5_pagealloc_stop(struct mlx5_core_dev *dev);
int mlx5_sriov_init(struct mlx5_core_dev *dev);
int mlx5_sriov_cleanup(struct mlx5_core_dev *dev);
void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id,
s32 npages);
int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot);
......@@ -932,6 +930,8 @@ enum {
struct mlx5_interface {
void * (*add)(struct mlx5_core_dev *dev);
void (*remove)(struct mlx5_core_dev *dev, void *context);
int (*attach)(struct mlx5_core_dev *dev, void *context);
void (*detach)(struct mlx5_core_dev *dev, void *context);
void (*event)(struct mlx5_core_dev *dev, void *context,
enum mlx5_dev_event event, unsigned long param);
void * (*get_dev)(void *context);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册